blob: a1caac0665c72bdfc5f6062e0f294ab8ff6085f0 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Tabular Datasets &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'python/dataset';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/python/dataset.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Arrow Flight RPC" href="flight.html" />
<link rel="prev" title="Reading and Writing the Apache Parquet Format" href="parquet.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="install.html">Installing PyArrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="getstarted.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="data.html">Data Types and In-Memory Data Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="compute.html">Compute Functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="memory.html">Memory and IO Interfaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="ipc.html">Streaming, Serialization, and IPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="filesystems.html">Filesystem Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="numpy.html">NumPy Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="pandas.html">Pandas Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="interchange_protocol.html">Dataframe Interchange Protocol</a></li>
<li class="toctree-l1"><a class="reference internal" href="dlpack.html">The DLPack Protocol</a></li>
<li class="toctree-l1"><a class="reference internal" href="timestamps.html">Timestamps</a></li>
<li class="toctree-l1"><a class="reference internal" href="orc.html">Reading and Writing the Apache ORC Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l1"><a class="reference internal" href="feather.html">Feather File Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="json.html">Reading JSON files</a></li>
<li class="toctree-l1"><a class="reference internal" href="parquet.html">Reading and Writing the Apache Parquet Format</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Tabular Datasets</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="extending_types.html">Extending pyarrow</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="integration.html">PyArrow Integrations</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="integration/python_r.html">Integrating PyArrow with R</a></li>
<li class="toctree-l2"><a class="reference internal" href="integration/python_java.html">Integrating PyArrow with Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="integration/extending.html">Using pyarrow from C++ and Cython Code</a></li>
<li class="toctree-l2"><a class="reference internal" href="integration/cuda.html">CUDA Integration</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="env_vars.html">Environment Variables</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/datatypes.html">Data Types and Schemas</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.null.html">pyarrow.null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.bool_.html">pyarrow.bool_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int8.html">pyarrow.int8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int16.html">pyarrow.int16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int32.html">pyarrow.int32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int64.html">pyarrow.int64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint8.html">pyarrow.uint8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint16.html">pyarrow.uint16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint32.html">pyarrow.uint32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint64.html">pyarrow.uint64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.float16.html">pyarrow.float16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.float32.html">pyarrow.float32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.float64.html">pyarrow.float64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.time32.html">pyarrow.time32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.time64.html">pyarrow.time64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.timestamp.html">pyarrow.timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.date32.html">pyarrow.date32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.date64.html">pyarrow.date64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.duration.html">pyarrow.duration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.month_day_nano_interval.html">pyarrow.month_day_nano_interval</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.binary.html">pyarrow.binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.string.html">pyarrow.string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.utf8.html">pyarrow.utf8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_binary.html">pyarrow.large_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_string.html">pyarrow.large_string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_utf8.html">pyarrow.large_utf8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.binary_view.html">pyarrow.binary_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.string_view.html">pyarrow.string_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.decimal128.html">pyarrow.decimal128</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.list_.html">pyarrow.list_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_list.html">pyarrow.large_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.list_view.html">pyarrow.list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_list_view.html">pyarrow.large_list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.map_.html">pyarrow.map_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.struct.html">pyarrow.struct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dictionary.html">pyarrow.dictionary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.run_end_encoded.html">pyarrow.run_end_encoded</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.field.html">pyarrow.field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.schema.html">pyarrow.schema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.from_numpy_dtype.html">pyarrow.from_numpy_dtype</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.unify_schemas.html">pyarrow.unify_schemas</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DataType.html">pyarrow.DataType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DictionaryType.html">pyarrow.DictionaryType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListType.html">pyarrow.ListType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MapType.html">pyarrow.MapType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StructType.html">pyarrow.StructType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UnionType.html">pyarrow.UnionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TimestampType.html">pyarrow.TimestampType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time32Type.html">pyarrow.Time32Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time64Type.html">pyarrow.Time64Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBinaryType.html">pyarrow.FixedSizeBinaryType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Decimal128Type.html">pyarrow.Decimal128Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Field.html">pyarrow.Field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Schema.html">pyarrow.Schema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RunEndEncodedType.html">pyarrow.RunEndEncodedType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ExtensionType.html">pyarrow.ExtensionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.PyExtensionType.html">pyarrow.PyExtensionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.register_extension_type.html">pyarrow.register_extension_type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.unregister_extension_type.html">pyarrow.unregister_extension_type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_boolean.html">pyarrow.types.is_boolean</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_integer.html">pyarrow.types.is_integer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_signed_integer.html">pyarrow.types.is_signed_integer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_unsigned_integer.html">pyarrow.types.is_unsigned_integer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int8.html">pyarrow.types.is_int8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int16.html">pyarrow.types.is_int16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int32.html">pyarrow.types.is_int32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int64.html">pyarrow.types.is_int64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint8.html">pyarrow.types.is_uint8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint16.html">pyarrow.types.is_uint16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint32.html">pyarrow.types.is_uint32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint64.html">pyarrow.types.is_uint64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_floating.html">pyarrow.types.is_floating</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_float16.html">pyarrow.types.is_float16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_float32.html">pyarrow.types.is_float32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_float64.html">pyarrow.types.is_float64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_decimal.html">pyarrow.types.is_decimal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_decimal128.html">pyarrow.types.is_decimal128</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_decimal256.html">pyarrow.types.is_decimal256</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_list.html">pyarrow.types.is_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_list.html">pyarrow.types.is_large_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_fixed_size_list.html">pyarrow.types.is_fixed_size_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_list_view.html">pyarrow.types.is_list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_list_view.html">pyarrow.types.is_large_list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_struct.html">pyarrow.types.is_struct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_union.html">pyarrow.types.is_union</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_nested.html">pyarrow.types.is_nested</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_run_end_encoded.html">pyarrow.types.is_run_end_encoded</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_temporal.html">pyarrow.types.is_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_timestamp.html">pyarrow.types.is_timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_date.html">pyarrow.types.is_date</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_date32.html">pyarrow.types.is_date32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_date64.html">pyarrow.types.is_date64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_time.html">pyarrow.types.is_time</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_time32.html">pyarrow.types.is_time32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_time64.html">pyarrow.types.is_time64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_duration.html">pyarrow.types.is_duration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_interval.html">pyarrow.types.is_interval</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_null.html">pyarrow.types.is_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_binary.html">pyarrow.types.is_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_unicode.html">pyarrow.types.is_unicode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_string.html">pyarrow.types.is_string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_binary.html">pyarrow.types.is_large_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_unicode.html">pyarrow.types.is_large_unicode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_string.html">pyarrow.types.is_large_string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_binary_view.html">pyarrow.types.is_binary_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_string_view.html">pyarrow.types.is_string_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_fixed_size_binary.html">pyarrow.types.is_fixed_size_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_map.html">pyarrow.types.is_map</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_dictionary.html">pyarrow.types.is_dictionary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_primitive.html">pyarrow.types.is_primitive</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/arrays.html">Arrays and Scalars</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.array.html">pyarrow.array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.nulls.html">pyarrow.nulls</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Array.html">pyarrow.Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BooleanArray.html">pyarrow.BooleanArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FloatingPointArray.html">pyarrow.FloatingPointArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.IntegerArray.html">pyarrow.IntegerArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int8Array.html">pyarrow.Int8Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int16Array.html">pyarrow.Int16Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int32Array.html">pyarrow.Int32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int64Array.html">pyarrow.Int64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NullArray.html">pyarrow.NullArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NumericArray.html">pyarrow.NumericArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt8Array.html">pyarrow.UInt8Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt16Array.html">pyarrow.UInt16Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt32Array.html">pyarrow.UInt32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt64Array.html">pyarrow.UInt64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.HalfFloatArray.html">pyarrow.HalfFloatArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FloatArray.html">pyarrow.FloatArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DoubleArray.html">pyarrow.DoubleArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BinaryArray.html">pyarrow.BinaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StringArray.html">pyarrow.StringArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBinaryArray.html">pyarrow.FixedSizeBinaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeBinaryArray.html">pyarrow.LargeBinaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeStringArray.html">pyarrow.LargeStringArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time32Array.html">pyarrow.Time32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time64Array.html">pyarrow.Time64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date32Array.html">pyarrow.Date32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date64Array.html">pyarrow.Date64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TimestampArray.html">pyarrow.TimestampArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DurationArray.html">pyarrow.DurationArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MonthDayNanoIntervalArray.html">pyarrow.MonthDayNanoIntervalArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Decimal128Array.html">pyarrow.Decimal128Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DictionaryArray.html">pyarrow.DictionaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListArray.html">pyarrow.ListArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeListArray.html">pyarrow.FixedSizeListArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListArray.html">pyarrow.LargeListArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListViewArray.html">pyarrow.ListViewArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListViewArray.html">pyarrow.LargeListViewArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MapArray.html">pyarrow.MapArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RunEndEncodedArray.html">pyarrow.RunEndEncodedArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StructArray.html">pyarrow.StructArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UnionArray.html">pyarrow.UnionArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ExtensionArray.html">pyarrow.ExtensionArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedShapeTensorArray.html">pyarrow.FixedShapeTensorArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.scalar.html">pyarrow.scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NA.html">pyarrow.NA</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Scalar.html">pyarrow.Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BooleanScalar.html">pyarrow.BooleanScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int8Scalar.html">pyarrow.Int8Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int16Scalar.html">pyarrow.Int16Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int32Scalar.html">pyarrow.Int32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int64Scalar.html">pyarrow.Int64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt8Scalar.html">pyarrow.UInt8Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt16Scalar.html">pyarrow.UInt16Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt32Scalar.html">pyarrow.UInt32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt64Scalar.html">pyarrow.UInt64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.HalfFloatScalar.html">pyarrow.HalfFloatScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FloatScalar.html">pyarrow.FloatScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DoubleScalar.html">pyarrow.DoubleScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BinaryScalar.html">pyarrow.BinaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StringScalar.html">pyarrow.StringScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBinaryScalar.html">pyarrow.FixedSizeBinaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeBinaryScalar.html">pyarrow.LargeBinaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeStringScalar.html">pyarrow.LargeStringScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BinaryViewScalar.html">pyarrow.BinaryViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StringViewScalar.html">pyarrow.StringViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time32Scalar.html">pyarrow.Time32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time64Scalar.html">pyarrow.Time64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date32Scalar.html">pyarrow.Date32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date64Scalar.html">pyarrow.Date64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TimestampScalar.html">pyarrow.TimestampScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DurationScalar.html">pyarrow.DurationScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MonthDayNanoIntervalScalar.html">pyarrow.MonthDayNanoIntervalScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Decimal128Scalar.html">pyarrow.Decimal128Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DictionaryScalar.html">pyarrow.DictionaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RunEndEncodedScalar.html">pyarrow.RunEndEncodedScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListScalar.html">pyarrow.ListScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListScalar.html">pyarrow.LargeListScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListViewScalar.html">pyarrow.ListViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListViewScalar.html">pyarrow.LargeListViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MapScalar.html">pyarrow.MapScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StructScalar.html">pyarrow.StructScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UnionScalar.html">pyarrow.UnionScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ExtensionScalar.html">pyarrow.ExtensionScalar</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/memory.html">Buffers and Memory</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.allocate_buffer.html">pyarrow.allocate_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.py_buffer.html">pyarrow.py_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.foreign_buffer.html">pyarrow.foreign_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Buffer.html">pyarrow.Buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ResizableBuffer.html">pyarrow.ResizableBuffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Codec.html">pyarrow.Codec</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compress.html">pyarrow.compress</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.decompress.html">pyarrow.decompress</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MemoryPool.html">pyarrow.MemoryPool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.default_memory_pool.html">pyarrow.default_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.jemalloc_memory_pool.html">pyarrow.jemalloc_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.mimalloc_memory_pool.html">pyarrow.mimalloc_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.system_memory_pool.html">pyarrow.system_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.jemalloc_set_decay_ms.html">pyarrow.jemalloc_set_decay_ms</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.set_memory_pool.html">pyarrow.set_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.log_memory_allocations.html">pyarrow.log_memory_allocations</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.total_allocated_bytes.html">pyarrow.total_allocated_bytes</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/tables.html">Tables and Tensors</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.chunked_array.html">pyarrow.chunked_array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.concat_arrays.html">pyarrow.concat_arrays</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.concat_tables.html">pyarrow.concat_tables</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.record_batch.html">pyarrow.record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.table.html">pyarrow.table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ChunkedArray.html">pyarrow.ChunkedArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RecordBatch.html">pyarrow.RecordBatch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Table.html">pyarrow.Table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TableGroupBy.html">pyarrow.TableGroupBy</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RecordBatchReader.html">pyarrow.RecordBatchReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.interchange.from_dataframe.html">pyarrow.interchange.from_dataframe</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Tensor.html">pyarrow.Tensor</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/compute.html">Compute Functions</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-7"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.all.html">pyarrow.compute.all</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.any.html">pyarrow.compute.any</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.approximate_median.html">pyarrow.compute.approximate_median</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count.html">pyarrow.compute.count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count_distinct.html">pyarrow.compute.count_distinct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.index.html">pyarrow.compute.index</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.max.html">pyarrow.compute.max</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.mean.html">pyarrow.compute.mean</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.min.html">pyarrow.compute.min</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.min_max.html">pyarrow.compute.min_max</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.mode.html">pyarrow.compute.mode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.product.html">pyarrow.compute.product</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.quantile.html">pyarrow.compute.quantile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.stddev.html">pyarrow.compute.stddev</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sum.html">pyarrow.compute.sum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.tdigest.html">pyarrow.compute.tdigest</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.variance.html">pyarrow.compute.variance</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_sum.html">pyarrow.compute.cumulative_sum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_sum_checked.html">pyarrow.compute.cumulative_sum_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_prod.html">pyarrow.compute.cumulative_prod</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_prod_checked.html">pyarrow.compute.cumulative_prod_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_max.html">pyarrow.compute.cumulative_max</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_min.html">pyarrow.compute.cumulative_min</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.abs.html">pyarrow.compute.abs</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.abs_checked.html">pyarrow.compute.abs_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.add.html">pyarrow.compute.add</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.add_checked.html">pyarrow.compute.add_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.divide.html">pyarrow.compute.divide</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.divide_checked.html">pyarrow.compute.divide_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.multiply.html">pyarrow.compute.multiply</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.multiply_checked.html">pyarrow.compute.multiply_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.negate.html">pyarrow.compute.negate</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.negate_checked.html">pyarrow.compute.negate_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.power.html">pyarrow.compute.power</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.power_checked.html">pyarrow.compute.power_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sign.html">pyarrow.compute.sign</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sqrt.html">pyarrow.compute.sqrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sqrt_checked.html">pyarrow.compute.sqrt_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.subtract.html">pyarrow.compute.subtract</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.subtract_checked.html">pyarrow.compute.subtract_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_and.html">pyarrow.compute.bit_wise_and</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_not.html">pyarrow.compute.bit_wise_not</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_or.html">pyarrow.compute.bit_wise_or</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_xor.html">pyarrow.compute.bit_wise_xor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_left.html">pyarrow.compute.shift_left</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_left_checked.html">pyarrow.compute.shift_left_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_right.html">pyarrow.compute.shift_right</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_right_checked.html">pyarrow.compute.shift_right_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ceil.html">pyarrow.compute.ceil</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.floor.html">pyarrow.compute.floor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.round.html">pyarrow.compute.round</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.round_to_multiple.html">pyarrow.compute.round_to_multiple</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.trunc.html">pyarrow.compute.trunc</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ln.html">pyarrow.compute.ln</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ln_checked.html">pyarrow.compute.ln_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log10.html">pyarrow.compute.log10</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log10_checked.html">pyarrow.compute.log10_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log1p.html">pyarrow.compute.log1p</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log1p_checked.html">pyarrow.compute.log1p_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log2.html">pyarrow.compute.log2</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log2_checked.html">pyarrow.compute.log2_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.logb.html">pyarrow.compute.logb</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.logb_checked.html">pyarrow.compute.logb_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.acos.html">pyarrow.compute.acos</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.acos_checked.html">pyarrow.compute.acos_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.asin.html">pyarrow.compute.asin</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.asin_checked.html">pyarrow.compute.asin_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.atan.html">pyarrow.compute.atan</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.atan2.html">pyarrow.compute.atan2</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cos.html">pyarrow.compute.cos</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cos_checked.html">pyarrow.compute.cos_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sin.html">pyarrow.compute.sin</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sin_checked.html">pyarrow.compute.sin_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.tan.html">pyarrow.compute.tan</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.tan_checked.html">pyarrow.compute.tan_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.equal.html">pyarrow.compute.equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.greater.html">pyarrow.compute.greater</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.greater_equal.html">pyarrow.compute.greater_equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.less.html">pyarrow.compute.less</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.less_equal.html">pyarrow.compute.less_equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.not_equal.html">pyarrow.compute.not_equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.max_element_wise.html">pyarrow.compute.max_element_wise</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.min_element_wise.html">pyarrow.compute.min_element_wise</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_.html">pyarrow.compute.and_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_kleene.html">pyarrow.compute.and_kleene</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_not.html">pyarrow.compute.and_not</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_not_kleene.html">pyarrow.compute.and_not_kleene</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.invert.html">pyarrow.compute.invert</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.or_.html">pyarrow.compute.or_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.or_kleene.html">pyarrow.compute.or_kleene</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.xor.html">pyarrow.compute.xor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_alnum.html">pyarrow.compute.ascii_is_alnum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_alpha.html">pyarrow.compute.ascii_is_alpha</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_decimal.html">pyarrow.compute.ascii_is_decimal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_lower.html">pyarrow.compute.ascii_is_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_printable.html">pyarrow.compute.ascii_is_printable</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_space.html">pyarrow.compute.ascii_is_space</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_upper.html">pyarrow.compute.ascii_is_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_alnum.html">pyarrow.compute.utf8_is_alnum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_alpha.html">pyarrow.compute.utf8_is_alpha</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_decimal.html">pyarrow.compute.utf8_is_decimal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_digit.html">pyarrow.compute.utf8_is_digit</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_lower.html">pyarrow.compute.utf8_is_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_numeric.html">pyarrow.compute.utf8_is_numeric</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_printable.html">pyarrow.compute.utf8_is_printable</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_space.html">pyarrow.compute.utf8_is_space</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_upper.html">pyarrow.compute.utf8_is_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_title.html">pyarrow.compute.ascii_is_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_title.html">pyarrow.compute.utf8_is_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.string_is_ascii.html">pyarrow.compute.string_is_ascii</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_capitalize.html">pyarrow.compute.ascii_capitalize</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_lower.html">pyarrow.compute.ascii_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_reverse.html">pyarrow.compute.ascii_reverse</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_swapcase.html">pyarrow.compute.ascii_swapcase</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_title.html">pyarrow.compute.ascii_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_upper.html">pyarrow.compute.ascii_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_length.html">pyarrow.compute.binary_length</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_repeat.html">pyarrow.compute.binary_repeat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_replace_slice.html">pyarrow.compute.binary_replace_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_reverse.html">pyarrow.compute.binary_reverse</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.replace_substring.html">pyarrow.compute.replace_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.replace_substring_regex.html">pyarrow.compute.replace_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_capitalize.html">pyarrow.compute.utf8_capitalize</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_length.html">pyarrow.compute.utf8_length</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_lower.html">pyarrow.compute.utf8_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_replace_slice.html">pyarrow.compute.utf8_replace_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_reverse.html">pyarrow.compute.utf8_reverse</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_swapcase.html">pyarrow.compute.utf8_swapcase</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_title.html">pyarrow.compute.utf8_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_upper.html">pyarrow.compute.utf8_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_center.html">pyarrow.compute.ascii_center</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_lpad.html">pyarrow.compute.ascii_lpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_rpad.html">pyarrow.compute.ascii_rpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_center.html">pyarrow.compute.utf8_center</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_lpad.html">pyarrow.compute.utf8_lpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_rpad.html">pyarrow.compute.utf8_rpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_ltrim.html">pyarrow.compute.ascii_ltrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_ltrim_whitespace.html">pyarrow.compute.ascii_ltrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_rtrim.html">pyarrow.compute.ascii_rtrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_rtrim_whitespace.html">pyarrow.compute.ascii_rtrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_trim.html">pyarrow.compute.ascii_trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_trim_whitespace.html">pyarrow.compute.ascii_trim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_ltrim.html">pyarrow.compute.utf8_ltrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_ltrim_whitespace.html">pyarrow.compute.utf8_ltrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_rtrim.html">pyarrow.compute.utf8_rtrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_rtrim_whitespace.html">pyarrow.compute.utf8_rtrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_trim.html">pyarrow.compute.utf8_trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_trim_whitespace.html">pyarrow.compute.utf8_trim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_split_whitespace.html">pyarrow.compute.ascii_split_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.split_pattern.html">pyarrow.compute.split_pattern</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.split_pattern_regex.html">pyarrow.compute.split_pattern_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_split_whitespace.html">pyarrow.compute.utf8_split_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.extract_regex.html">pyarrow.compute.extract_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_join.html">pyarrow.compute.binary_join</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_join_element_wise.html">pyarrow.compute.binary_join_element_wise</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_slice.html">pyarrow.compute.binary_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_slice_codeunits.html">pyarrow.compute.utf8_slice_codeunits</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count_substring.html">pyarrow.compute.count_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count_substring_regex.html">pyarrow.compute.count_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ends_with.html">pyarrow.compute.ends_with</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.find_substring.html">pyarrow.compute.find_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.find_substring_regex.html">pyarrow.compute.find_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.index_in.html">pyarrow.compute.index_in</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_in.html">pyarrow.compute.is_in</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.match_like.html">pyarrow.compute.match_like</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.match_substring.html">pyarrow.compute.match_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.match_substring_regex.html">pyarrow.compute.match_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.starts_with.html">pyarrow.compute.starts_with</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.indices_nonzero.html">pyarrow.compute.indices_nonzero</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_finite.html">pyarrow.compute.is_finite</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_inf.html">pyarrow.compute.is_inf</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_nan.html">pyarrow.compute.is_nan</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_null.html">pyarrow.compute.is_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_valid.html">pyarrow.compute.is_valid</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.true_unless_null.html">pyarrow.compute.true_unless_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.case_when.html">pyarrow.compute.case_when</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.choose.html">pyarrow.compute.choose</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.coalesce.html">pyarrow.compute.coalesce</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.if_else.html">pyarrow.compute.if_else</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cast.html">pyarrow.compute.cast</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ceil_temporal.html">pyarrow.compute.ceil_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.floor_temporal.html">pyarrow.compute.floor_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.round_temporal.html">pyarrow.compute.round_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.run_end_decode.html">pyarrow.compute.run_end_decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.run_end_encode.html">pyarrow.compute.run_end_encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.strftime.html">pyarrow.compute.strftime</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.strptime.html">pyarrow.compute.strptime</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day.html">pyarrow.compute.day</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day_of_week.html">pyarrow.compute.day_of_week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day_of_year.html">pyarrow.compute.day_of_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.hour.html">pyarrow.compute.hour</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.iso_week.html">pyarrow.compute.iso_week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.iso_year.html">pyarrow.compute.iso_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.iso_calendar.html">pyarrow.compute.iso_calendar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_leap_year.html">pyarrow.compute.is_leap_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.microsecond.html">pyarrow.compute.microsecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.millisecond.html">pyarrow.compute.millisecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.minute.html">pyarrow.compute.minute</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.month.html">pyarrow.compute.month</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.nanosecond.html">pyarrow.compute.nanosecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.quarter.html">pyarrow.compute.quarter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.second.html">pyarrow.compute.second</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.subsecond.html">pyarrow.compute.subsecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.us_week.html">pyarrow.compute.us_week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.us_year.html">pyarrow.compute.us_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.week.html">pyarrow.compute.week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.year.html">pyarrow.compute.year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.year_month_day.html">pyarrow.compute.year_month_day</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day_time_interval_between.html">pyarrow.compute.day_time_interval_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.days_between.html">pyarrow.compute.days_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.hours_between.html">pyarrow.compute.hours_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.microseconds_between.html">pyarrow.compute.microseconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.milliseconds_between.html">pyarrow.compute.milliseconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.minutes_between.html">pyarrow.compute.minutes_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.month_day_nano_interval_between.html">pyarrow.compute.month_day_nano_interval_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.month_interval_between.html">pyarrow.compute.month_interval_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.nanoseconds_between.html">pyarrow.compute.nanoseconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.quarters_between.html">pyarrow.compute.quarters_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.seconds_between.html">pyarrow.compute.seconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.weeks_between.html">pyarrow.compute.weeks_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.years_between.html">pyarrow.compute.years_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.assume_timezone.html">pyarrow.compute.assume_timezone</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.local_timestamp.html">pyarrow.compute.local_timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.dictionary_encode.html">pyarrow.compute.dictionary_encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.unique.html">pyarrow.compute.unique</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.value_counts.html">pyarrow.compute.value_counts</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.array_filter.html">pyarrow.compute.array_filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.array_take.html">pyarrow.compute.array_take</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.drop_null.html">pyarrow.compute.drop_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.filter.html">pyarrow.compute.filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.take.html">pyarrow.compute.take</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.array_sort_indices.html">pyarrow.compute.array_sort_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.partition_nth_indices.html">pyarrow.compute.partition_nth_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.select_k_unstable.html">pyarrow.compute.select_k_unstable</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sort_indices.html">pyarrow.compute.sort_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.fill_null.html">pyarrow.compute.fill_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.fill_null_backward.html">pyarrow.compute.fill_null_backward</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.fill_null_forward.html">pyarrow.compute.fill_null_forward</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_element.html">pyarrow.compute.list_element</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_flatten.html">pyarrow.compute.list_flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_parent_indices.html">pyarrow.compute.list_parent_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_slice.html">pyarrow.compute.list_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_value_length.html">pyarrow.compute.list_value_length</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.make_struct.html">pyarrow.compute.make_struct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.map_lookup.html">pyarrow.compute.map_lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.replace_with_mask.html">pyarrow.compute.replace_with_mask</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.struct_field.html">pyarrow.compute.struct_field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.pairwise_diff.html">pyarrow.compute.pairwise_diff</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ArraySortOptions.html">pyarrow.compute.ArraySortOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.AssumeTimezoneOptions.html">pyarrow.compute.AssumeTimezoneOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CastOptions.html">pyarrow.compute.CastOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CountOptions.html">pyarrow.compute.CountOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CountOptions.html">pyarrow.compute.CountOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CumulativeSumOptions.html">pyarrow.compute.CumulativeSumOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.DayOfWeekOptions.html">pyarrow.compute.DayOfWeekOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.DictionaryEncodeOptions.html">pyarrow.compute.DictionaryEncodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ElementWiseAggregateOptions.html">pyarrow.compute.ElementWiseAggregateOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ExtractRegexOptions.html">pyarrow.compute.ExtractRegexOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.FilterOptions.html">pyarrow.compute.FilterOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.IndexOptions.html">pyarrow.compute.IndexOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.JoinOptions.html">pyarrow.compute.JoinOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ListSliceOptions.html">pyarrow.compute.ListSliceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.MakeStructOptions.html">pyarrow.compute.MakeStructOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.MapLookupOptions.html">pyarrow.compute.MapLookupOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.MatchSubstringOptions.html">pyarrow.compute.MatchSubstringOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ModeOptions.html">pyarrow.compute.ModeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.NullOptions.html">pyarrow.compute.NullOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.PadOptions.html">pyarrow.compute.PadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.PairwiseOptions.html">pyarrow.compute.PairwiseOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.PartitionNthOptions.html">pyarrow.compute.PartitionNthOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.QuantileOptions.html">pyarrow.compute.QuantileOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ReplaceSliceOptions.html">pyarrow.compute.ReplaceSliceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ReplaceSubstringOptions.html">pyarrow.compute.ReplaceSubstringOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RoundOptions.html">pyarrow.compute.RoundOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RoundTemporalOptions.html">pyarrow.compute.RoundTemporalOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RoundToMultipleOptions.html">pyarrow.compute.RoundToMultipleOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RunEndEncodeOptions.html">pyarrow.compute.RunEndEncodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ScalarAggregateOptions.html">pyarrow.compute.ScalarAggregateOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ScalarAggregateOptions.html">pyarrow.compute.ScalarAggregateOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SelectKOptions.html">pyarrow.compute.SelectKOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SetLookupOptions.html">pyarrow.compute.SetLookupOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SliceOptions.html">pyarrow.compute.SliceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SortOptions.html">pyarrow.compute.SortOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SplitOptions.html">pyarrow.compute.SplitOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SplitPatternOptions.html">pyarrow.compute.SplitPatternOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.StrftimeOptions.html">pyarrow.compute.StrftimeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.StrptimeOptions.html">pyarrow.compute.StrptimeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.StructFieldOptions.html">pyarrow.compute.StructFieldOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TakeOptions.html">pyarrow.compute.TakeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TDigestOptions.html">pyarrow.compute.TDigestOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TDigestOptions.html">pyarrow.compute.TDigestOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TrimOptions.html">pyarrow.compute.TrimOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.VarianceOptions.html">pyarrow.compute.VarianceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.WeekOptions.html">pyarrow.compute.WeekOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.register_scalar_function.html">pyarrow.compute.register_scalar_function</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.UdfContext.html">pyarrow.compute.UdfContext</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/acero.html">Acero - Streaming Execution Engine</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-8"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.Declaration.html">pyarrow.acero.Declaration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.ExecNodeOptions.html">pyarrow.acero.ExecNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.TableSourceNodeOptions.html">pyarrow.acero.TableSourceNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.ScanNodeOptions.html">pyarrow.acero.ScanNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.FilterNodeOptions.html">pyarrow.acero.FilterNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.ProjectNodeOptions.html">pyarrow.acero.ProjectNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.AggregateNodeOptions.html">pyarrow.acero.AggregateNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.OrderByNodeOptions.html">pyarrow.acero.OrderByNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.HashJoinNodeOptions.html">pyarrow.acero.HashJoinNodeOptions</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/substrait.html">Substrait</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-9"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.run_query.html">pyarrow.substrait.run_query</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.BoundExpressions.html">pyarrow.substrait.BoundExpressions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.deserialize_expressions.html">pyarrow.substrait.deserialize_expressions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.serialize_expressions.html">pyarrow.substrait.serialize_expressions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.get_supported_functions.html">pyarrow.substrait.get_supported_functions</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/files.html">Streams and File Access</a><input class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-10"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.input_stream.html">pyarrow.input_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.output_stream.html">pyarrow.output_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.memory_map.html">pyarrow.memory_map</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.create_memory_map.html">pyarrow.create_memory_map</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NativeFile.html">pyarrow.NativeFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.OSFile.html">pyarrow.OSFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.PythonFile.html">pyarrow.PythonFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BufferReader.html">pyarrow.BufferReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BufferOutputStream.html">pyarrow.BufferOutputStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBufferWriter.html">pyarrow.FixedSizeBufferWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MemoryMappedFile.html">pyarrow.MemoryMappedFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.CompressedInputStream.html">pyarrow.CompressedInputStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.CompressedOutputStream.html">pyarrow.CompressedOutputStream</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/ipc.html">Serialization and IPC</a><input class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-11"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.new_file.html">pyarrow.ipc.new_file</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.open_file.html">pyarrow.ipc.open_file</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.new_stream.html">pyarrow.ipc.new_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.open_stream.html">pyarrow.ipc.open_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.read_message.html">pyarrow.ipc.read_message</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.read_record_batch.html">pyarrow.ipc.read_record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.get_record_batch_size.html">pyarrow.ipc.get_record_batch_size</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.read_tensor.html">pyarrow.ipc.read_tensor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.write_tensor.html">pyarrow.ipc.write_tensor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.get_tensor_size.html">pyarrow.ipc.get_tensor_size</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.IpcReadOptions.html">pyarrow.ipc.IpcReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.IpcWriteOptions.html">pyarrow.ipc.IpcWriteOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.Message.html">pyarrow.ipc.Message</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.MessageReader.html">pyarrow.ipc.MessageReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchFileReader.html">pyarrow.ipc.RecordBatchFileReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchFileWriter.html">pyarrow.ipc.RecordBatchFileWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchStreamReader.html">pyarrow.ipc.RecordBatchStreamReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchStreamWriter.html">pyarrow.ipc.RecordBatchStreamWriter</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/flight.html">Arrow Flight</a><input class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-12"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Action.html">pyarrow.flight.Action</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ActionType.html">pyarrow.flight.ActionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.DescriptorType.html">pyarrow.flight.DescriptorType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightDescriptor.html">pyarrow.flight.FlightDescriptor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightEndpoint.html">pyarrow.flight.FlightEndpoint</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightInfo.html">pyarrow.flight.FlightInfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Location.html">pyarrow.flight.Location</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.MetadataRecordBatchReader.html">pyarrow.flight.MetadataRecordBatchReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.MetadataRecordBatchWriter.html">pyarrow.flight.MetadataRecordBatchWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Ticket.html">pyarrow.flight.Ticket</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Result.html">pyarrow.flight.Result</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.connect.html">pyarrow.flight.connect</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightCallOptions.html">pyarrow.flight.FlightCallOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightClient.html">pyarrow.flight.FlightClient</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightStreamReader.html">pyarrow.flight.FlightStreamReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightStreamWriter.html">pyarrow.flight.FlightStreamWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ClientMiddlewareFactory.html">pyarrow.flight.ClientMiddlewareFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ClientMiddleware.html">pyarrow.flight.ClientMiddleware</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightDataStream.html">pyarrow.flight.FlightDataStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightMetadataWriter.html">pyarrow.flight.FlightMetadataWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightServerBase.html">pyarrow.flight.FlightServerBase</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.GeneratorStream.html">pyarrow.flight.GeneratorStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.RecordBatchStream.html">pyarrow.flight.RecordBatchStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerCallContext.html">pyarrow.flight.ServerCallContext</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerMiddlewareFactory.html">pyarrow.flight.ServerMiddlewareFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerMiddleware.html">pyarrow.flight.ServerMiddleware</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ClientAuthHandler.html">pyarrow.flight.ClientAuthHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerAuthHandler.html">pyarrow.flight.ServerAuthHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightError.html">pyarrow.flight.FlightError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightCancelledError.html">pyarrow.flight.FlightCancelledError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightInternalError.html">pyarrow.flight.FlightInternalError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightServerError.html">pyarrow.flight.FlightServerError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightTimedOutError.html">pyarrow.flight.FlightTimedOutError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightUnauthenticatedError.html">pyarrow.flight.FlightUnauthenticatedError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightUnauthorizedError.html">pyarrow.flight.FlightUnauthorizedError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightUnavailableError.html">pyarrow.flight.FlightUnavailableError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightWriteSizeExceededError.html">pyarrow.flight.FlightWriteSizeExceededError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightMethod.html">pyarrow.flight.FlightMethod</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.CallInfo.html">pyarrow.flight.CallInfo</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/formats.html">Tabular File Formats</a><input class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-13"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ConvertOptions.html">pyarrow.csv.ConvertOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.CSVStreamingReader.html">pyarrow.csv.CSVStreamingReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.CSVWriter.html">pyarrow.csv.CSVWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ISO8601.html">pyarrow.csv.ISO8601</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ParseOptions.html">pyarrow.csv.ParseOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ReadOptions.html">pyarrow.csv.ReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.WriteOptions.html">pyarrow.csv.WriteOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.open_csv.html">pyarrow.csv.open_csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.read_csv.html">pyarrow.csv.read_csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.write_csv.html">pyarrow.csv.write_csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.InvalidRow.html">pyarrow.csv.InvalidRow</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.feather.read_feather.html">pyarrow.feather.read_feather</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.feather.read_table.html">pyarrow.feather.read_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.feather.write_feather.html">pyarrow.feather.write_feather</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.json.ReadOptions.html">pyarrow.json.ReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.json.ParseOptions.html">pyarrow.json.ParseOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.json.read_json.html">pyarrow.json.read_json</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetDataset.html">pyarrow.parquet.ParquetDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetFile.html">pyarrow.parquet.ParquetFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetWriter.html">pyarrow.parquet.ParquetWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_table.html">pyarrow.parquet.read_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_metadata.html">pyarrow.parquet.read_metadata</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_pandas.html">pyarrow.parquet.read_pandas</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_schema.html">pyarrow.parquet.read_schema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.write_metadata.html">pyarrow.parquet.write_metadata</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.write_table.html">pyarrow.parquet.write_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.write_to_dataset.html">pyarrow.parquet.write_to_dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.FileMetaData.html">pyarrow.parquet.FileMetaData</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.RowGroupMetaData.html">pyarrow.parquet.RowGroupMetaData</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.SortingColumn.html">pyarrow.parquet.SortingColumn</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ColumnChunkMetaData.html">pyarrow.parquet.ColumnChunkMetaData</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.Statistics.html">pyarrow.parquet.Statistics</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetSchema.html">pyarrow.parquet.ParquetSchema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ColumnSchema.html">pyarrow.parquet.ColumnSchema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetLogicalType.html">pyarrow.parquet.ParquetLogicalType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.CryptoFactory.html">pyarrow.parquet.encryption.CryptoFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.KmsClient.html">pyarrow.parquet.encryption.KmsClient</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.KmsConnectionConfig.html">pyarrow.parquet.encryption.KmsConnectionConfig</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.EncryptionConfiguration.html">pyarrow.parquet.encryption.EncryptionConfiguration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.DecryptionConfiguration.html">pyarrow.parquet.encryption.DecryptionConfiguration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.ORCFile.html">pyarrow.orc.ORCFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.ORCWriter.html">pyarrow.orc.ORCWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.read_table.html">pyarrow.orc.read_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.write_table.html">pyarrow.orc.write_table</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/filesystems.html">Filesystems</a><input class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-14"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileInfo.html">pyarrow.fs.FileInfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileSelector.html">pyarrow.fs.FileSelector</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileSystem.html">pyarrow.fs.FileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.LocalFileSystem.html">pyarrow.fs.LocalFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.S3FileSystem.html">pyarrow.fs.S3FileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.GcsFileSystem.html">pyarrow.fs.GcsFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.HadoopFileSystem.html">pyarrow.fs.HadoopFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.SubTreeFileSystem.html">pyarrow.fs.SubTreeFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.PyFileSystem.html">pyarrow.fs.PyFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileSystemHandler.html">pyarrow.fs.FileSystemHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FSSpecHandler.html">pyarrow.fs.FSSpecHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.copy_files.html">pyarrow.fs.copy_files</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.initialize_s3.html">pyarrow.fs.initialize_s3</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.finalize_s3.html">pyarrow.fs.finalize_s3</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.resolve_s3_region.html">pyarrow.fs.resolve_s3_region</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.S3LogLevel.html">pyarrow.fs.S3LogLevel</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/dataset.html">Dataset</a><input class="toctree-checkbox" id="toctree-checkbox-15" name="toctree-checkbox-15" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-15"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.dataset.html">pyarrow.dataset.dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.parquet_dataset.html">pyarrow.dataset.parquet_dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.partitioning.html">pyarrow.dataset.partitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.field.html">pyarrow.dataset.field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.scalar.html">pyarrow.dataset.scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html">pyarrow.dataset.write_dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileFormat.html">pyarrow.dataset.FileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.CsvFileFormat.html">pyarrow.dataset.CsvFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.CsvFragmentScanOptions.html">pyarrow.dataset.CsvFragmentScanOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.IpcFileFormat.html">pyarrow.dataset.IpcFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.JsonFileFormat.html">pyarrow.dataset.JsonFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetFileFormat.html">pyarrow.dataset.ParquetFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetReadOptions.html">pyarrow.dataset.ParquetReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetFragmentScanOptions.html">pyarrow.dataset.ParquetFragmentScanOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetFileFragment.html">pyarrow.dataset.ParquetFileFragment</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.OrcFileFormat.html">pyarrow.dataset.OrcFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Partitioning.html">pyarrow.dataset.Partitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.PartitioningFactory.html">pyarrow.dataset.PartitioningFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.DirectoryPartitioning.html">pyarrow.dataset.DirectoryPartitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.HivePartitioning.html">pyarrow.dataset.HivePartitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FilenamePartitioning.html">pyarrow.dataset.FilenamePartitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Dataset.html">pyarrow.dataset.Dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileSystemDataset.html">pyarrow.dataset.FileSystemDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileSystemFactoryOptions.html">pyarrow.dataset.FileSystemFactoryOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileSystemDatasetFactory.html">pyarrow.dataset.FileSystemDatasetFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.UnionDataset.html">pyarrow.dataset.UnionDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Fragment.html">pyarrow.dataset.Fragment</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FragmentScanOptions.html">pyarrow.dataset.FragmentScanOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.TaggedRecordBatch.html">pyarrow.dataset.TaggedRecordBatch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Scanner.html">pyarrow.dataset.Scanner</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Expression.html">pyarrow.dataset.Expression</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.InMemoryDataset.html">pyarrow.dataset.InMemoryDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.WrittenFile.html">pyarrow.dataset.WrittenFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.get_partition_keys.html">pyarrow.dataset.get_partition_keys</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/cuda.html">CUDA Integration</a><input class="toctree-checkbox" id="toctree-checkbox-16" name="toctree-checkbox-16" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-16"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.Context.html">pyarrow.cuda.Context</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.CudaBuffer.html">pyarrow.cuda.CudaBuffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.new_host_buffer.html">pyarrow.cuda.new_host_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.HostBuffer.html">pyarrow.cuda.HostBuffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.BufferReader.html">pyarrow.cuda.BufferReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.BufferWriter.html">pyarrow.cuda.BufferWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.serialize_record_batch.html">pyarrow.cuda.serialize_record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.read_record_batch.html">pyarrow.cuda.read_record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.read_message.html">pyarrow.cuda.read_message</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.IpcMemHandle.html">pyarrow.cuda.IpcMemHandle</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/misc.html">Miscellaneous</a><input class="toctree-checkbox" id="toctree-checkbox-17" name="toctree-checkbox-17" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-17"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cpu_count.html">pyarrow.cpu_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.set_cpu_count.html">pyarrow.set_cpu_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.io_thread_count.html">pyarrow.io_thread_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.set_io_thread_count.html">pyarrow.set_io_thread_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.get_include.html">pyarrow.get_include</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.get_libraries.html">pyarrow.get_libraries</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.get_library_dirs.html">pyarrow.get_library_dirs</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="getting_involved.html">Getting Involved</a></li>
<li class="toctree-l1"><a class="reference internal" href="benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/py/">Python cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Python</a></li>
<li class="breadcrumb-item active" aria-current="page">Tabular Datasets</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="tabular-datasets">
<span id="dataset"></span><h1>Tabular Datasets<a class="headerlink" href="#tabular-datasets" title="Permalink to this heading">#</a></h1>
<p>The <code class="docutils literal notranslate"><span class="pre">pyarrow.dataset</span></code> module provides functionality to efficiently work with
tabular, potentially larger than memory, and multi-file datasets. This includes:</p>
<ul class="simple">
<li><p>A unified interface that supports different sources and file formats and
different file systems (local, cloud).</p></li>
<li><p>Discovery of sources (crawling directories, handle directory-based partitioned
datasets, basic schema normalization, ..)</p></li>
<li><p>Optimized reading with predicate pushdown (filtering rows), projection
(selecting and deriving columns), and optionally parallel reading.</p></li>
</ul>
<p>The supported file formats currently are Parquet, Feather / Arrow IPC, CSV and
ORC (note that ORC datasets can currently only be read and not yet written).
The goal is to expand support to other file formats and data sources
(e.g. database connections) in the future.</p>
<p>For those familiar with the existing <a class="reference internal" href="generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset" title="pyarrow.parquet.ParquetDataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.ParquetDataset</span></code></a> for
reading Parquet datasets: <code class="docutils literal notranslate"><span class="pre">pyarrow.dataset</span></code>’s goal is similar but not specific
to the Parquet format and not tied to Python: the same datasets API is exposed
in the R bindings or Arrow. In addition <code class="docutils literal notranslate"><span class="pre">pyarrow.dataset</span></code> boasts improved
performance and new features (e.g. filtering within files rather than only on
partition keys).</p>
<section id="reading-datasets">
<h2>Reading Datasets<a class="headerlink" href="#reading-datasets" title="Permalink to this heading">#</a></h2>
<p>For the examples below, let’s create a small dataset consisting
of a directory with two parquet files:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [1]: </span><span class="kn">import</span> <span class="nn">tempfile</span>
<span class="gp">In [2]: </span><span class="kn">import</span> <span class="nn">pathlib</span>
<span class="gp">In [3]: </span><span class="kn">import</span> <span class="nn">pyarrow</span> <span class="k">as</span> <span class="nn">pa</span>
<span class="gp">In [4]: </span><span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">pq</span>
<span class="gp">In [5]: </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="gp">In [6]: </span><span class="n">base</span> <span class="o">=</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">(</span><span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s2">&quot;pyarrow-&quot;</span><span class="p">))</span>
<span class="gp">In [7]: </span><span class="p">(</span><span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"># creating an Arrow Table</span>
<span class="gp">In [8]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">table</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">5</span><span class="p">})</span>
<span class="go"># writing it into two parquet files</span>
<span class="gp">In [9]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span> <span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset/data1.parquet&quot;</span><span class="p">)</span>
<span class="gp">In [10]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset/data2.parquet&quot;</span><span class="p">)</span>
</pre></div>
</div>
<section id="dataset-discovery">
<h3>Dataset discovery<a class="headerlink" href="#dataset-discovery" title="Permalink to this heading">#</a></h3>
<p>A <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset" title="pyarrow.dataset.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> object can be created with the <a class="reference internal" href="generated/pyarrow.dataset.dataset.html#pyarrow.dataset.dataset" title="pyarrow.dataset.dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">dataset()</span></code></a> function. We
can pass it the path to the directory containing the data files:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [11]: </span><span class="kn">import</span> <span class="nn">pyarrow.dataset</span> <span class="k">as</span> <span class="nn">ds</span>
<span class="gp">In [12]: </span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">)</span>
<span class="gp">In [13]: </span><span class="n">dataset</span>
<span class="gh">Out[13]: </span><span class="go">&lt;pyarrow._dataset.FileSystemDataset at 0x7fa1f1b88d60&gt;</span>
</pre></div>
</div>
<p>In addition to searching a base directory, <a class="reference internal" href="generated/pyarrow.dataset.dataset.html#pyarrow.dataset.dataset" title="pyarrow.dataset.dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">dataset()</span></code></a> accepts a path to a
single file or a list of file paths.</p>
<p>Creating a <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset" title="pyarrow.dataset.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> object does not begin reading the data itself. If
needed, it only crawls the directory to find all the files:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [14]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">files</span>
<span class="gh">Out[14]: </span>
<span class="go">[&#39;/tmp/pyarrow-b58trrt0/parquet_dataset/data1.parquet&#39;,</span>
<span class="go"> &#39;/tmp/pyarrow-b58trrt0/parquet_dataset/data2.parquet&#39;]</span>
</pre></div>
</div>
<p>… and infers the dataset’s schema (by default from the first file):</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [15]: </span><span class="nb">print</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span><span class="n">show_field_metadata</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
<span class="go">a: int64</span>
<span class="go">b: double</span>
<span class="go">c: int64</span>
</pre></div>
</div>
<p>Using the <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset.to_table" title="pyarrow.dataset.Dataset.to_table"><code class="xref py py-meth docutils literal notranslate"><span class="pre">Dataset.to_table()</span></code></a> method we can read the dataset (or a portion
of it) into a pyarrow Table (note that depending on the size of your dataset
this can require a lot of memory, see below on filtering / iterative loading):</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [16]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">()</span>
<span class="gh">Out[16]: </span>
<span class="go">pyarrow.Table</span>
<span class="go">a: int64</span>
<span class="go">b: double</span>
<span class="go">c: int64</span>
<span class="gt">----</span>
<span class="ne">a</span>: [[0,1,2,3,4],[5,6,7,8,9]]
<span class="ne">b</span>: [[-0.5951790418558823,-0.5414216401443146,0.3002666498322499,-0.11677923333998687,-0.4675703363078683],[0.3778829640687314,-1.340155251996848,2.863185944261404,0.389365350831195,-2.150305815537713]]
<span class="ne">c</span>: [[1,2,1,2,1],[2,1,2,1,2]]
<span class="c1"># converting to pandas to see the contents of the scanned table</span>
<span class="gp">In [17]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">()</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[17]: </span>
<span class="go"> a b c</span>
<span class="go">0 0 -0.595179 1</span>
<span class="go">1 1 -0.541422 2</span>
<span class="go">2 2 0.300267 1</span>
<span class="go">3 3 -0.116779 2</span>
<span class="go">4 4 -0.467570 1</span>
<span class="go">5 5 0.377883 2</span>
<span class="go">6 6 -1.340155 1</span>
<span class="go">7 7 2.863186 2</span>
<span class="go">8 8 0.389365 1</span>
<span class="go">9 9 -2.150306 2</span>
</pre></div>
</div>
</section>
<section id="reading-different-file-formats">
<h3>Reading different file formats<a class="headerlink" href="#reading-different-file-formats" title="Permalink to this heading">#</a></h3>
<p>The above examples use Parquet files as dataset sources but the Dataset API
provides a consistent interface across multiple file formats and filesystems.
Currently, Parquet, ORC, Feather / Arrow IPC, and CSV file formats are
supported; more formats are planned in the future.</p>
<p>If we save the table as Feather files instead of Parquet files:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [18]: </span><span class="kn">import</span> <span class="nn">pyarrow.feather</span> <span class="k">as</span> <span class="nn">feather</span>
<span class="gp">In [19]: </span><span class="n">feather</span><span class="o">.</span><span class="n">write_feather</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;data.feather&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>…then we can read the Feather file using the same functions, but with specifying
<code class="docutils literal notranslate"><span class="pre">format=&quot;feather&quot;</span></code>:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [20]: </span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;data.feather&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;feather&quot;</span><span class="p">)</span>
<span class="gp">In [21]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">()</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
<span class="gh">Out[21]: </span>
<span class="go"> a b c</span>
<span class="go">0 0 -0.595179 1</span>
<span class="go">1 1 -0.541422 2</span>
<span class="go">2 2 0.300267 1</span>
<span class="go">3 3 -0.116779 2</span>
<span class="go">4 4 -0.467570 1</span>
</pre></div>
</div>
</section>
<section id="customizing-file-formats">
<h3>Customizing file formats<a class="headerlink" href="#customizing-file-formats" title="Permalink to this heading">#</a></h3>
<p>The format name as a string, like:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>is short hand for a default constructed <a class="reference internal" href="generated/pyarrow.dataset.ParquetFileFormat.html#pyarrow.dataset.ParquetFileFormat" title="pyarrow.dataset.ParquetFileFormat"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetFileFormat</span></code></a>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="n">ds</span><span class="o">.</span><span class="n">ParquetFileFormat</span><span class="p">())</span>
</pre></div>
</div>
<p>The <a class="reference internal" href="generated/pyarrow.dataset.FileFormat.html#pyarrow.dataset.FileFormat" title="pyarrow.dataset.FileFormat"><code class="xref py py-class docutils literal notranslate"><span class="pre">FileFormat</span></code></a> objects can be customized using keywords. For example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">parquet_format</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">ParquetFileFormat</span><span class="p">(</span><span class="n">read_options</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;dictionary_columns&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">]})</span>
<span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="n">parquet_format</span><span class="p">)</span>
</pre></div>
</div>
<p>Will configure column <code class="docutils literal notranslate"><span class="pre">&quot;a&quot;</span></code> to be dictionary encoded on scan.</p>
</section>
</section>
<section id="filtering-data">
<span id="py-filter-dataset"></span><h2>Filtering data<a class="headerlink" href="#filtering-data" title="Permalink to this heading">#</a></h2>
<p>To avoid reading all data when only needing a subset, the <code class="docutils literal notranslate"><span class="pre">columns</span></code> and
<code class="docutils literal notranslate"><span class="pre">filter</span></code> keywords can be used.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">columns</span></code> keyword can be used to only read the specified columns:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [22]: </span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">)</span>
<span class="gp">In [23]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[23]: </span>
<span class="go"> a b</span>
<span class="go">0 0 -0.595179</span>
<span class="go">1 1 -0.541422</span>
<span class="go">2 2 0.300267</span>
<span class="go">3 3 -0.116779</span>
<span class="go">4 4 -0.467570</span>
<span class="go">5 5 0.377883</span>
<span class="go">6 6 -1.340155</span>
<span class="go">7 7 2.863186</span>
<span class="go">8 8 0.389365</span>
<span class="go">9 9 -2.150306</span>
</pre></div>
</div>
<p>With the <code class="docutils literal notranslate"><span class="pre">filter</span></code> keyword, rows which do not match the filter predicate will
not be included in the returned table. The keyword expects a boolean
<a class="reference internal" href="generated/pyarrow.dataset.Expression.html#pyarrow.dataset.Expression" title="pyarrow.dataset.Expression"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expression</span></code></a> referencing at least one of the columns:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [24]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="nb">filter</span><span class="o">=</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="mi">7</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[24]: </span>
<span class="go"> a b c</span>
<span class="go">0 7 2.863186 2</span>
<span class="go">1 8 0.389365 1</span>
<span class="go">2 9 -2.150306 2</span>
<span class="gp">In [25]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="nb">filter</span><span class="o">=</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[25]: </span>
<span class="go"> a b c</span>
<span class="go">0 1 -0.541422 2</span>
<span class="go">1 3 -0.116779 2</span>
<span class="go">2 5 0.377883 2</span>
<span class="go">3 7 2.863186 2</span>
<span class="go">4 9 -2.150306 2</span>
</pre></div>
</div>
<p>The easiest way to construct those <a class="reference internal" href="generated/pyarrow.dataset.Expression.html#pyarrow.dataset.Expression" title="pyarrow.dataset.Expression"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expression</span></code></a> objects is by using the
<a class="reference internal" href="generated/pyarrow.dataset.field.html#pyarrow.dataset.field" title="pyarrow.dataset.field"><code class="xref py py-func docutils literal notranslate"><span class="pre">field()</span></code></a> helper function. Any column - not just partition columns - can be
referenced using the <a class="reference internal" href="generated/pyarrow.dataset.field.html#pyarrow.dataset.field" title="pyarrow.dataset.field"><code class="xref py py-func docutils literal notranslate"><span class="pre">field()</span></code></a> function (which creates a
<code class="xref py py-class docutils literal notranslate"><span class="pre">FieldExpression</span></code>). Operator overloads are provided to compose filters
including the comparisons (equal, larger/less than, etc), set membership
testing, and boolean combinations (<code class="docutils literal notranslate"><span class="pre">&amp;</span></code>, <code class="docutils literal notranslate"><span class="pre">|</span></code>, <code class="docutils literal notranslate"><span class="pre">~</span></code>):</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [26]: </span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">3</span>
<span class="gh">Out[26]: </span><span class="go">&lt;pyarrow.compute.Expression (a != 3)&gt;</span>
<span class="gp">In [27]: </span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gh">Out[27]: </span>
<span class="go">&lt;pyarrow.compute.Expression is_in(a, {value_set=int64:[</span>
<span class="go"> 1,</span>
<span class="go"> 2,</span>
<span class="go"> 3</span>
<span class="go">], null_matching_behavior=MATCH})&gt;</span>
<span class="gp">In [28]: </span><span class="p">(</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;b&#39;</span><span class="p">))</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;b&#39;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">)</span>
<span class="gh">Out[28]: </span><span class="go">&lt;pyarrow.compute.Expression ((a &gt; b) and (b &gt; 1))&gt;</span>
</pre></div>
</div>
<p>Note that <a class="reference internal" href="generated/pyarrow.dataset.Expression.html#pyarrow.dataset.Expression" title="pyarrow.dataset.Expression"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expression</span></code></a> objects can <strong>not</strong> be combined by python logical
operators <code class="docutils literal notranslate"><span class="pre">and</span></code>, <code class="docutils literal notranslate"><span class="pre">or</span></code> and <code class="docutils literal notranslate"><span class="pre">not</span></code>.</p>
</section>
<section id="projecting-columns">
<h2>Projecting columns<a class="headerlink" href="#projecting-columns" title="Permalink to this heading">#</a></h2>
<p>The <code class="docutils literal notranslate"><span class="pre">columns</span></code> keyword can be used to read a subset of the columns of the
dataset by passing it a list of column names. The keyword can also be used
for more complex projections in combination with expressions.</p>
<p>In this case, we pass it a dictionary with the keys being the resulting
column names and the values the expression that is used to construct the column
values:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [29]: </span><span class="n">projection</span> <span class="o">=</span> <span class="p">{</span>
<span class="gp"> ....: </span> <span class="s2">&quot;a_renamed&quot;</span><span class="p">:</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">),</span>
<span class="gp"> ....: </span> <span class="s2">&quot;b_as_float32&quot;</span><span class="p">:</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;float32&quot;</span><span class="p">),</span>
<span class="gp"> ....: </span> <span class="s2">&quot;c_1&quot;</span><span class="p">:</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">&quot;c&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span>
<span class="gp"> ....: </span><span class="p">}</span>
<span class="gp"> ....: </span>
<span class="gp">In [30]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="n">projection</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
<span class="gh">Out[30]: </span>
<span class="go"> a_renamed b_as_float32 c_1</span>
<span class="go">0 0 -0.595179 True</span>
<span class="go">1 1 -0.541422 False</span>
<span class="go">2 2 0.300267 True</span>
<span class="go">3 3 -0.116779 False</span>
<span class="go">4 4 -0.467570 True</span>
</pre></div>
</div>
<p>The dictionary also determines the column selection (only the keys in the
dictionary will be present as columns in the resulting table). If you want
to include a derived column in <em>addition</em> to the existing columns, you can
build up the dictionary from the dataset schema:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [31]: </span><span class="n">projection</span> <span class="o">=</span> <span class="p">{</span><span class="n">col</span><span class="p">:</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">names</span><span class="p">}</span>
<span class="gp">In [32]: </span><span class="n">projection</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s2">&quot;b_large&quot;</span><span class="p">:</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">})</span>
<span class="gp">In [33]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="n">projection</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
<span class="gh">Out[33]: </span>
<span class="go"> a b c b_large</span>
<span class="go">0 0 -0.595179 1 False</span>
<span class="go">1 1 -0.541422 2 False</span>
<span class="go">2 2 0.300267 1 False</span>
<span class="go">3 3 -0.116779 2 False</span>
<span class="go">4 4 -0.467570 1 False</span>
</pre></div>
</div>
</section>
<section id="reading-partitioned-data">
<h2>Reading partitioned data<a class="headerlink" href="#reading-partitioned-data" title="Permalink to this heading">#</a></h2>
<p>Above, a dataset consisting of a flat directory with files was shown. However, a
dataset can exploit a nested directory structure defining a partitioned dataset,
where the sub-directory names hold information about which subset of the data is
stored in that directory.</p>
<p>For example, a dataset partitioned by year and month may look like on disk:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>dataset_name/
year=2007/
month=01/
data0.parquet
data1.parquet
...
month=02/
data0.parquet
data1.parquet
...
month=03/
...
year=2008/
month=01/
...
...
</pre></div>
</div>
<p>The above partitioning scheme is using “/key=value/” directory names, as found
in Apache Hive.</p>
<p>Let’s create a small partitioned dataset. The <a class="reference internal" href="generated/pyarrow.parquet.write_to_dataset.html#pyarrow.parquet.write_to_dataset" title="pyarrow.parquet.write_to_dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_to_dataset()</span></code></a>
function can write such hive-like partitioned datasets.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [34]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">table</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">5</span><span class="p">,</span>
<span class="gp"> ....: </span> <span class="s1">&#39;part&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">]</span> <span class="o">*</span> <span class="mi">5</span> <span class="o">+</span> <span class="p">[</span><span class="s1">&#39;b&#39;</span><span class="p">]</span> <span class="o">*</span> <span class="mi">5</span><span class="p">})</span>
<span class="gp"> ....: </span>
<span class="gp">In [35]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_to_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s2">&quot;parquet_dataset_partitioned&quot;</span><span class="p">,</span>
<span class="gp"> ....: </span> <span class="n">partition_cols</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;part&#39;</span><span class="p">])</span>
<span class="gp"> ....: </span>
</pre></div>
</div>
<p>The above created a directory with two subdirectories (“part=a” and “part=b”),
and the Parquet files written in those directories no longer include the “part”
column.</p>
<p>Reading this dataset with <a class="reference internal" href="generated/pyarrow.dataset.dataset.html#pyarrow.dataset.dataset" title="pyarrow.dataset.dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">dataset()</span></code></a>, we now specify that the dataset
should use a hive-like partitioning scheme with the <code class="docutils literal notranslate"><span class="pre">partitioning</span></code> keyword:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [36]: </span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="s2">&quot;parquet_dataset_partitioned&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">,</span>
<span class="gp"> ....: </span> <span class="n">partitioning</span><span class="o">=</span><span class="s2">&quot;hive&quot;</span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="gp">In [37]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">files</span>
<span class="gh">Out[37]: </span>
<span class="go">[&#39;parquet_dataset_partitioned/part=a/01b6f2ba881d411fa725c84b95cbc620-0.parquet&#39;,</span>
<span class="go"> &#39;parquet_dataset_partitioned/part=b/01b6f2ba881d411fa725c84b95cbc620-0.parquet&#39;]</span>
</pre></div>
</div>
<p>Although the partition fields are not included in the actual Parquet files,
they will be added back to the resulting table when scanning this dataset:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [38]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">()</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
<span class="gh">Out[38]: </span>
<span class="go"> a b c part</span>
<span class="go">0 0 -0.581390 1 a</span>
<span class="go">1 1 0.111059 2 a</span>
<span class="go">2 2 0.857418 1 a</span>
</pre></div>
</div>
<p>We can now filter on the partition keys, which avoids loading files
altogether if they do not match the filter:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [39]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="nb">filter</span><span class="o">=</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">&quot;part&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="s2">&quot;b&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[39]: </span>
<span class="go"> a b c part</span>
<span class="go">0 5 -0.641342 2 b</span>
<span class="go">1 6 0.692135 1 b</span>
<span class="go">2 7 -1.092980 2 b</span>
<span class="go">3 8 -0.769477 1 b</span>
<span class="go">4 9 -1.764723 2 b</span>
</pre></div>
</div>
<section id="different-partitioning-schemes">
<h3>Different partitioning schemes<a class="headerlink" href="#different-partitioning-schemes" title="Permalink to this heading">#</a></h3>
<p>The above example uses a hive-like directory scheme, such as “/year=2009/month=11/day=15”.
We specified this passing the <code class="docutils literal notranslate"><span class="pre">partitioning=&quot;hive&quot;</span></code> keyword. In this case,
the types of the partition keys are inferred from the file paths.</p>
<p>It is also possible to explicitly define the schema of the partition keys
using the <a class="reference internal" href="generated/pyarrow.dataset.partitioning.html#pyarrow.dataset.partitioning" title="pyarrow.dataset.partitioning"><code class="xref py py-func docutils literal notranslate"><span class="pre">partitioning()</span></code></a> function. For example:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">partitioning</span><span class="p">(</span>
<span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s2">&quot;year&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">()),</span> <span class="p">(</span><span class="s2">&quot;month&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">()),</span> <span class="p">(</span><span class="s2">&quot;day&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int32</span><span class="p">())]),</span>
<span class="n">flavor</span><span class="o">=</span><span class="s2">&quot;hive&quot;</span>
<span class="p">)</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="n">part</span><span class="p">)</span>
</pre></div>
</div>
<p>“Directory partitioning” is also supported, where the segments in the file path
represent the values of the partition keys without including the name (the
field name are implicit in the segment’s index). For example, given field names
“year”, “month”, and “day”, one path might be “/2019/11/15”.</p>
<p>Since the names are not included in the file paths, these must be specified
when constructing a directory partitioning:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">partitioning</span><span class="p">(</span><span class="n">field_names</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;year&quot;</span><span class="p">,</span> <span class="s2">&quot;month&quot;</span><span class="p">,</span> <span class="s2">&quot;day&quot;</span><span class="p">])</span>
</pre></div>
</div>
<p>Directory partitioning also supports providing a full schema rather than inferring
types from file paths.</p>
</section>
</section>
<section id="reading-from-cloud-storage">
<h2>Reading from cloud storage<a class="headerlink" href="#reading-from-cloud-storage" title="Permalink to this heading">#</a></h2>
<p>In addition to local files, pyarrow also supports reading from cloud storage.
Currently, <a class="reference internal" href="generated/pyarrow.fs.HadoopFileSystem.html#pyarrow.fs.HadoopFileSystem" title="pyarrow.fs.HadoopFileSystem"><code class="xref py py-class docutils literal notranslate"><span class="pre">HDFS</span></code></a> and
<a class="reference internal" href="generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem" title="pyarrow.fs.S3FileSystem"><code class="xref py py-class docutils literal notranslate"><span class="pre">Amazon</span> <span class="pre">S3-compatible</span> <span class="pre">storage</span></code></a> are supported.</p>
<p>When passing a file URI, the file system will be inferred. For example,
specifying a S3 path:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="s2">&quot;s3://voltrondata-labs-datasets/nyc-taxi/&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>Typically, you will want to customize the connection parameters, and then
a file system object can be created and passed to the <code class="docutils literal notranslate"><span class="pre">filesystem</span></code> keyword:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyarrow</span> <span class="kn">import</span> <span class="n">fs</span>
<span class="n">s3</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">S3FileSystem</span><span class="p">(</span><span class="n">region</span><span class="o">=</span><span class="s2">&quot;us-east-2&quot;</span><span class="p">)</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="s2">&quot;voltrondata-labs-datasets/nyc-taxi/&quot;</span><span class="p">,</span> <span class="n">filesystem</span><span class="o">=</span><span class="n">s3</span><span class="p">)</span>
</pre></div>
</div>
<p>The currently available classes are <a class="reference internal" href="generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem" title="pyarrow.fs.S3FileSystem"><code class="xref py py-class docutils literal notranslate"><span class="pre">S3FileSystem</span></code></a> and
<a class="reference internal" href="generated/pyarrow.fs.HadoopFileSystem.html#pyarrow.fs.HadoopFileSystem" title="pyarrow.fs.HadoopFileSystem"><code class="xref py py-class docutils literal notranslate"><span class="pre">HadoopFileSystem</span></code></a>. See the <a class="reference internal" href="filesystems.html#filesystem"><span class="std std-ref">Filesystem Interface</span></a> docs for more
details.</p>
</section>
<section id="reading-from-minio">
<h2>Reading from Minio<a class="headerlink" href="#reading-from-minio" title="Permalink to this heading">#</a></h2>
<p>In addition to cloud storage, pyarrow also supports reading from a
<a class="reference external" href="https://github.com/minio/minio">MinIO</a> object storage instance emulating S3
APIs. Paired with <a class="reference external" href="https://github.com/shopify/toxiproxy">toxiproxy</a>, this is
useful for testing or benchmarking.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyarrow</span> <span class="kn">import</span> <span class="n">fs</span>
<span class="c1"># By default, MinIO will listen for unencrypted HTTP traffic.</span>
<span class="n">minio</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">S3FileSystem</span><span class="p">(</span><span class="n">scheme</span><span class="o">=</span><span class="s2">&quot;http&quot;</span><span class="p">,</span> <span class="n">endpoint_override</span><span class="o">=</span><span class="s2">&quot;localhost:9000&quot;</span><span class="p">)</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="s2">&quot;voltrondata-labs-datasets/nyc-taxi/&quot;</span><span class="p">,</span> <span class="n">filesystem</span><span class="o">=</span><span class="n">minio</span><span class="p">)</span>
</pre></div>
</div>
</section>
<section id="working-with-parquet-datasets">
<h2>Working with Parquet Datasets<a class="headerlink" href="#working-with-parquet-datasets" title="Permalink to this heading">#</a></h2>
<p>While the Datasets API provides a unified interface to different file formats,
some specific methods exist for Parquet Datasets.</p>
<p>Some processing frameworks such as Dask (optionally) use a <code class="docutils literal notranslate"><span class="pre">_metadata</span></code> file
with partitioned datasets which includes information about the schema and the
row group metadata of the full dataset. Using such a file can give a more
efficient creation of a parquet Dataset, since it does not need to infer the
schema and crawl the directories for all Parquet files (this is especially the
case for filesystems where accessing files is expensive). The
<a class="reference internal" href="generated/pyarrow.dataset.parquet_dataset.html#pyarrow.dataset.parquet_dataset" title="pyarrow.dataset.parquet_dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">parquet_dataset()</span></code></a> function allows us to create a Dataset from a partitioned
dataset with a <code class="docutils literal notranslate"><span class="pre">_metadata</span></code> file:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">parquet_dataset</span><span class="p">(</span><span class="s2">&quot;/path/to/dir/_metadata&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>By default, the constructed <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset" title="pyarrow.dataset.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a> object for Parquet datasets maps
each fragment to a single Parquet file. If you want fragments mapping to each
row group of a Parquet file, you can use the <code class="docutils literal notranslate"><span class="pre">split_by_row_group()</span></code> method of
the fragments:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">fragments</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">get_fragments</span><span class="p">())</span>
<span class="n">fragments</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split_by_row_group</span><span class="p">()</span>
</pre></div>
</div>
<p>This method returns a list of new Fragments mapping to each row group of
the original Fragment (Parquet file). Both <code class="docutils literal notranslate"><span class="pre">get_fragments()</span></code> and
<code class="docutils literal notranslate"><span class="pre">split_by_row_group()</span></code> accept an optional filter expression to get a
filtered list of fragments.</p>
</section>
<section id="manual-specification-of-the-dataset">
<h2>Manual specification of the Dataset<a class="headerlink" href="#manual-specification-of-the-dataset" title="Permalink to this heading">#</a></h2>
<p>The <a class="reference internal" href="generated/pyarrow.dataset.dataset.html#pyarrow.dataset.dataset" title="pyarrow.dataset.dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">dataset()</span></code></a> function allows easy creation of a Dataset viewing a directory,
crawling all subdirectories for files and partitioning information. However
sometimes discovery is not required and the dataset’s files and partitions
are already known (for example, when this information is stored in metadata).
In this case it is possible to create a Dataset explicitly without any
automatic discovery or inference.</p>
<p>For the example here, we are going to use a dataset where the file names contain
additional partitioning information:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="go"># creating a dummy dataset: directory with two files</span>
<span class="gp">In [40]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">table</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">),</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">3</span><span class="p">)})</span>
<span class="gp">In [41]: </span><span class="p">(</span><span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset_manual&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">In [42]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset_manual&quot;</span> <span class="o">/</span> <span class="s2">&quot;data_2018.parquet&quot;</span><span class="p">)</span>
<span class="gp">In [43]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset_manual&quot;</span> <span class="o">/</span> <span class="s2">&quot;data_2019.parquet&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>To create a Dataset from a list of files, we need to specify the paths, schema,
format, filesystem, and partition expressions manually:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [44]: </span><span class="kn">from</span> <span class="nn">pyarrow</span> <span class="kn">import</span> <span class="n">fs</span>
<span class="gp">In [45]: </span><span class="n">schema</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s2">&quot;year&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()),</span> <span class="p">(</span><span class="s2">&quot;col1&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()),</span> <span class="p">(</span><span class="s2">&quot;col2&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">float64</span><span class="p">())])</span>
<span class="gp">In [46]: </span><span class="n">dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">FileSystemDataset</span><span class="o">.</span><span class="n">from_paths</span><span class="p">(</span>
<span class="gp"> ....: </span> <span class="p">[</span><span class="s2">&quot;data_2018.parquet&quot;</span><span class="p">,</span> <span class="s2">&quot;data_2019.parquet&quot;</span><span class="p">],</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="n">ds</span><span class="o">.</span><span class="n">ParquetFileFormat</span><span class="p">(),</span>
<span class="gp"> ....: </span> <span class="n">filesystem</span><span class="o">=</span><span class="n">fs</span><span class="o">.</span><span class="n">SubTreeFileSystem</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">base</span> <span class="o">/</span> <span class="s2">&quot;parquet_dataset_manual&quot;</span><span class="p">),</span> <span class="n">fs</span><span class="o">.</span><span class="n">LocalFileSystem</span><span class="p">()),</span>
<span class="gp"> ....: </span> <span class="n">partitions</span><span class="o">=</span><span class="p">[</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;year&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2018</span><span class="p">,</span> <span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;year&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2019</span><span class="p">])</span>
<span class="gp"> ....: </span>
</pre></div>
</div>
<p>Since we specified the “partition expressions” for our files, this information
is materialized as columns when reading the data and can be used for filtering:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [47]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">()</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[47]: </span>
<span class="go"> year col1 col2</span>
<span class="go">0 2018 0 -1.546696</span>
<span class="go">1 2018 1 -0.378167</span>
<span class="go">2 2018 2 0.188676</span>
<span class="go">3 2019 0 -1.546696</span>
<span class="go">4 2019 1 -0.378167</span>
<span class="go">5 2019 2 0.188676</span>
<span class="gp">In [48]: </span><span class="n">dataset</span><span class="o">.</span><span class="n">to_table</span><span class="p">(</span><span class="nb">filter</span><span class="o">=</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;year&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2019</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[48]: </span>
<span class="go"> year col1 col2</span>
<span class="go">0 2019 0 -1.546696</span>
<span class="go">1 2019 1 -0.378167</span>
<span class="go">2 2019 2 0.188676</span>
</pre></div>
</div>
<p>Another benefit of manually listing the files is that the order of the files
controls the order of the data. When performing an ordered read (or a read to
a table) then the rows returned will match the order of the files given. This
only applies when the dataset is constructed with a list of files. There
are no order guarantees given when the files are instead discovered by scanning
a directory.</p>
</section>
<section id="iterative-out-of-core-or-streaming-reads">
<h2>Iterative (out of core or streaming) reads<a class="headerlink" href="#iterative-out-of-core-or-streaming-reads" title="Permalink to this heading">#</a></h2>
<p>The previous examples have demonstrated how to read the data into a table using <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset.to_table" title="pyarrow.dataset.Dataset.to_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_table()</span></code></a>. This is
useful if the dataset is small or there is only a small amount of data that needs to
be read. The dataset API contains additional methods to read and process large amounts
of data in a streaming fashion.</p>
<p>The easiest way to do this is to use the method <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset.to_batches" title="pyarrow.dataset.Dataset.to_batches"><code class="xref py py-meth docutils literal notranslate"><span class="pre">Dataset.to_batches()</span></code></a>. This
method returns an iterator of record batches. For example, we can use this method to
calculate the average of a column without loading the entire column into memory:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [49]: </span><span class="kn">import</span> <span class="nn">pyarrow.compute</span> <span class="k">as</span> <span class="nn">pc</span>
<span class="gp">In [50]: </span><span class="n">col2_sum</span> <span class="o">=</span> <span class="mi">0</span>
<span class="gp">In [51]: </span><span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
<span class="gp">In [52]: </span><span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">to_batches</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;col2&quot;</span><span class="p">],</span> <span class="nb">filter</span><span class="o">=~</span><span class="n">ds</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">&quot;col2&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">is_null</span><span class="p">()):</span>
<span class="gp"> ....: </span> <span class="n">col2_sum</span> <span class="o">+=</span> <span class="n">pc</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">batch</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="s2">&quot;col2&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span>
<span class="gp"> ....: </span> <span class="n">count</span> <span class="o">+=</span> <span class="n">batch</span><span class="o">.</span><span class="n">num_rows</span>
<span class="gp"> ....: </span>
<span class="gp">In [53]: </span><span class="n">mean_a</span> <span class="o">=</span> <span class="n">col2_sum</span><span class="o">/</span><span class="n">count</span>
</pre></div>
</div>
<section id="customizing-the-batch-size">
<h3>Customizing the batch size<a class="headerlink" href="#customizing-the-batch-size" title="Permalink to this heading">#</a></h3>
<p>An iterative read of a dataset is often called a “scan” of the dataset and pyarrow
uses an object called a <a class="reference internal" href="generated/pyarrow.dataset.Scanner.html#pyarrow.dataset.Scanner" title="pyarrow.dataset.Scanner"><code class="xref py py-class docutils literal notranslate"><span class="pre">Scanner</span></code></a> to do this. A Scanner is created for you
automatically by the <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset.to_table" title="pyarrow.dataset.Dataset.to_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_table()</span></code></a> and <a class="reference internal" href="generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset.to_batches" title="pyarrow.dataset.Dataset.to_batches"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_batches()</span></code></a> method of the dataset.
Any arguments you pass to these methods will be passed on to the Scanner constructor.</p>
<p>One of those parameters is the <code class="docutils literal notranslate"><span class="pre">batch_size</span></code>. This controls the maximum size of the
batches returned by the scanner. Batches can still be smaller than the <code class="docutils literal notranslate"><span class="pre">batch_size</span></code>
if the dataset consists of small files or those files themselves consist of small
row groups. For example, a parquet file with 10,000 rows per row group will yield
batches with, at most, 10,000 rows unless the <code class="docutils literal notranslate"><span class="pre">batch_size</span></code> is set to a smaller value.</p>
<p>The default batch size is one million rows and this is typically a good default but
you may want to customize it if you are reading a large number of columns.</p>
</section>
</section>
<section id="a-note-on-transactions-acid-guarantees">
<h2>A note on transactions &amp; ACID guarantees<a class="headerlink" href="#a-note-on-transactions-acid-guarantees" title="Permalink to this heading">#</a></h2>
<p>The dataset API offers no transaction support or any ACID guarantees. This affects
both reading and writing. Concurrent reads are fine. Concurrent writes or writes
concurring with reads may have unexpected behavior. Various approaches can be used
to avoid operating on the same files such as using a unique basename template for
each writer, a temporary directory for new files, or separate storage of the file
list instead of relying on directory discovery.</p>
<p>Unexpectedly killing the process while a write is in progress can leave the system
in an inconsistent state. Write calls generally return as soon as the bytes to be
written have been completely delivered to the OS page cache. Even though a write
operation has been completed it is possible for part of the file to be lost if
there is a sudden power loss immediately after the write call.</p>
<p>Most file formats have magic numbers which are written at the end. This means a
partial file write can safely be detected and discarded. The CSV file format does
not have any such concept and a partially written CSV file may be detected as valid.</p>
</section>
<section id="writing-datasets">
<h2>Writing Datasets<a class="headerlink" href="#writing-datasets" title="Permalink to this heading">#</a></h2>
<p>The dataset API also simplifies writing data to a dataset using <a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset" title="pyarrow.dataset.write_dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_dataset()</span></code></a> . This can be useful when
you want to partition your data or you need to write a large amount of data. A
basic dataset write is similar to writing a table except that you specify a directory
instead of a filename.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [54]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">table</span><span class="p">({</span><span class="s2">&quot;a&quot;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="s2">&quot;b&quot;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="s2">&quot;c&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">5</span><span class="p">})</span>
<span class="gp">In [55]: </span><span class="n">ds</span><span class="o">.</span><span class="n">write_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s2">&quot;sample_dataset&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>The above example will create a single file named part-0.parquet in our sample_dataset
directory.</p>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>If you run the example again it will replace the existing part-0.parquet file.
Appending files to an existing dataset requires specifying a new
<code class="docutils literal notranslate"><span class="pre">basename_template</span></code> for each call to <code class="docutils literal notranslate"><span class="pre">ds.write_dataset</span></code>
to avoid overwrite.</p>
</div>
<section id="writing-partitioned-data">
<h3>Writing partitioned data<a class="headerlink" href="#writing-partitioned-data" title="Permalink to this heading">#</a></h3>
<p>A partitioning object can be used to specify how your output data should be partitioned.
This uses the same kind of partitioning objects we used for reading datasets. To write
our above data out to a partitioned directory we only need to specify how we want the
dataset to be partitioned. For example:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [56]: </span><span class="n">part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">partitioning</span><span class="p">(</span>
<span class="gp"> ....: </span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">())]),</span> <span class="n">flavor</span><span class="o">=</span><span class="s2">&quot;hive&quot;</span>
<span class="gp"> ....: </span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="gp">In [57]: </span><span class="n">ds</span><span class="o">.</span><span class="n">write_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s2">&quot;partitioned_dataset&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="n">part</span><span class="p">)</span>
</pre></div>
</div>
<p>This will create two files. Half our data will be in the dataset_root/c=1 directory and
the other half will be in the dataset_root/c=2 directory.</p>
</section>
<section id="partitioning-performance-considerations">
<h3>Partitioning performance considerations<a class="headerlink" href="#partitioning-performance-considerations" title="Permalink to this heading">#</a></h3>
<p>Partitioning datasets has two aspects that affect performance: it increases the number of
files and it creates a directory structure around the files. Both of these have benefits
as well as costs. Depending on the configuration and the size of your dataset, the costs
can outweigh the benefits.</p>
<p>Because partitions split up the dataset into multiple files, partitioned datasets can be
read and written with parallelism. However, each additional file adds a little overhead in
processing for filesystem interaction. It also increases the overall dataset size since
each file has some shared metadata. For example, each parquet file contains the schema and
group-level statistics. The number of partitions is a floor for the number of files. If
you partition a dataset by date with a year of data, you will have at least 365 files. If
you further partition by another dimension with 1,000 unique values, you will have up to
365,000 files. This fine of partitioning often leads to small files that mostly consist of
metadata.</p>
<p>Partitioned datasets create nested folder structures, and those allow us to prune which
files are loaded in a scan. However, this adds overhead to discovering files in the dataset,
as we’ll need to recursively “list directory” to find the data files. Too fine
partitions can cause problems here: Partitioning a dataset by date for a years worth
of data will require 365 list calls to find all the files; adding another column with
cardinality 1,000 will make that 365,365 calls.</p>
<p>The most optimal partitioning layout will depend on your data, access patterns, and which
systems will be reading the data. Most systems, including Arrow, should work across a
range of file sizes and partitioning layouts, but there are extremes you should avoid. These
guidelines can help avoid some known worst cases:</p>
<ul class="simple">
<li><p>Avoid files smaller than 20MB and larger than 2GB.</p></li>
<li><p>Avoid partitioning layouts with more than 10,000 distinct partitions.</p></li>
</ul>
<p>For file formats that have a notion of groups within a file, such as Parquet, similar
guidelines apply. Row groups can provide parallelism when reading and allow data skipping
based on statistics, but very small groups can cause metadata to be a significant portion
of file size. Arrow’s file writer provides sensible defaults for group sizing in most cases.</p>
</section>
<section id="configuring-files-open-during-a-write">
<h3>Configuring files open during a write<a class="headerlink" href="#configuring-files-open-during-a-write" title="Permalink to this heading">#</a></h3>
<p>When writing data to the disk, there are a few parameters that can be
important to optimize the writes, such as the number of rows per file and
the maximum number of open files allowed during the write.</p>
<p>Set the maximum number of files opened with the <code class="docutils literal notranslate"><span class="pre">max_open_files</span></code> parameter of
<a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset" title="pyarrow.dataset.write_dataset"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write_dataset()</span></code></a>.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">max_open_files</span></code> is set greater than 0 then this will limit the maximum
number of files that can be left open. This only applies to writing partitioned
datasets, where rows are dispatched to the appropriate file depending on their
partition values. If an attempt is made to open too many files then the least
recently used file will be closed. If this setting is set too low you may end
up fragmenting your data into many small files.</p>
<p>If your process is concurrently using other file handlers, either with a
dataset scanner or otherwise, you may hit a system file handler limit. For
example, if you are scanning a dataset with 300 files and writing out to
900 files, the total of 1200 files may be over a system limit. (On Linux,
this might be a “Too Many Open Files” error.) You can either reduce this
<code class="docutils literal notranslate"><span class="pre">max_open_files</span></code> setting or increase the file handler limit on your
system. The default value is 900 which allows some number of files
to be open by the scanner before hitting the default Linux limit of 1024.</p>
<p>Another important configuration used in <a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset" title="pyarrow.dataset.write_dataset"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write_dataset()</span></code></a> is <code class="docutils literal notranslate"><span class="pre">max_rows_per_file</span></code>.</p>
<p>Set the maximum number of rows written in each file with the <code class="docutils literal notranslate"><span class="pre">max_rows_per_files</span></code>
parameter of <a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset" title="pyarrow.dataset.write_dataset"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write_dataset()</span></code></a>.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">max_rows_per_file</span></code> is set greater than 0 then this will limit how many
rows are placed in any single file. Otherwise there will be no limit and one
file will be created in each output directory unless files need to be closed to respect
<code class="docutils literal notranslate"><span class="pre">max_open_files</span></code>. This setting is the primary way to control file size.
For workloads writing a lot of data, files can get very large without a
row count cap, leading to out-of-memory errors in downstream readers. The
relationship between row count and file size depends on the dataset schema
and how well compressed (if at all) the data is.</p>
</section>
<section id="configuring-rows-per-group-during-a-write">
<h3>Configuring rows per group during a write<a class="headerlink" href="#configuring-rows-per-group-during-a-write" title="Permalink to this heading">#</a></h3>
<p>The volume of data written to the disk per each group can be configured.
This configuration includes a lower and an upper bound.
The minimum number of rows required to form a row group is
defined with the <code class="docutils literal notranslate"><span class="pre">min_rows_per_group</span></code> parameter of <a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset" title="pyarrow.dataset.write_dataset"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write_dataset()</span></code></a>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If <code class="docutils literal notranslate"><span class="pre">min_rows_per_group</span></code> is set greater than 0 then this will cause the
dataset writer to batch incoming data and only write the row groups to the
disk when sufficient rows have accumulated. The final row group size may be
less than this value if other options such as <code class="docutils literal notranslate"><span class="pre">max_open_files</span></code> or
<code class="docutils literal notranslate"><span class="pre">max_rows_per_file</span></code> force smaller row group sizes.</p>
</div>
<p>The maximum number of rows allowed per group is defined with the
<code class="docutils literal notranslate"><span class="pre">max_rows_per_group</span></code> parameter of <a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html#pyarrow.dataset.write_dataset" title="pyarrow.dataset.write_dataset"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write_dataset()</span></code></a>.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">max_rows_per_group</span></code> is set greater than 0 then the dataset writer may split
up large incoming batches into multiple row groups. If this value is set then
<code class="docutils literal notranslate"><span class="pre">min_rows_per_group</span></code> should also be set or else you may end up with very small
row groups (e.g. if the incoming row group size is just barely larger than this value).</p>
<p>Row groups are built into the Parquet and IPC/Feather formats but don’t affect JSON or CSV.
When reading back Parquet and IPC formats in Arrow, the row group boundaries become the
record batch boundaries, determining the default batch size of downstream readers.
Additionally, row groups in Parquet files have column statistics which can help readers
skip irrelevant data but can add size to the file. As an extreme example, if one sets
max_rows_per_group=1 in Parquet, they will have large files because most of the files
will be row group statistics.</p>
</section>
<section id="writing-large-amounts-of-data">
<h3>Writing large amounts of data<a class="headerlink" href="#writing-large-amounts-of-data" title="Permalink to this heading">#</a></h3>
<p>The above examples wrote data from a table. If you are writing a large amount of data
you may not be able to load everything into a single in-memory table. Fortunately, the
<code class="xref py py-func docutils literal notranslate"><span class="pre">write_dataset()</span></code> method also accepts an iterable of record batches. This makes it really
simple, for example, to repartition a large dataset without loading the entire dataset
into memory:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [58]: </span><span class="n">old_part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">partitioning</span><span class="p">(</span>
<span class="gp"> ....: </span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">())]),</span> <span class="n">flavor</span><span class="o">=</span><span class="s2">&quot;hive&quot;</span>
<span class="gp"> ....: </span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="gp">In [59]: </span><span class="n">new_part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">partitioning</span><span class="p">(</span>
<span class="gp"> ....: </span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">())]),</span> <span class="n">flavor</span><span class="o">=</span><span class="kc">None</span>
<span class="gp"> ....: </span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="gp">In [60]: </span><span class="n">input_dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="s2">&quot;partitioned_dataset&quot;</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="n">old_part</span><span class="p">)</span>
<span class="go"># A scanner can act as an iterator of record batches but you could also receive</span>
<span class="go"># data from the network (e.g. via flight), from your own scanning, or from any</span>
<span class="go"># other method that yields record batches. In addition, you can pass a dataset</span>
<span class="go"># into write_dataset directly but this method is useful if you want to customize</span>
<span class="go"># the scanner (e.g. to filter the input dataset or set a maximum batch size)</span>
<span class="gp">In [61]: </span><span class="n">scanner</span> <span class="o">=</span> <span class="n">input_dataset</span><span class="o">.</span><span class="n">scanner</span><span class="p">()</span>
<span class="gp">In [62]: </span><span class="n">ds</span><span class="o">.</span><span class="n">write_dataset</span><span class="p">(</span><span class="n">scanner</span><span class="p">,</span> <span class="s2">&quot;repartitioned_dataset&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="n">new_part</span><span class="p">)</span>
</pre></div>
</div>
<p>After the above example runs our data will be in dataset_root/1 and dataset_root/2
directories. In this simple example we are not changing the structure of the data
(only the directory naming schema) but you could also use this mechanism to change
which columns are used to partition the dataset. This is useful when you expect to
query your data in specific ways and you can utilize partitioning to reduce the
amount of data you need to read.</p>
</section>
<section id="customizing-inspecting-written-files">
<h3>Customizing &amp; inspecting written files<a class="headerlink" href="#customizing-inspecting-written-files" title="Permalink to this heading">#</a></h3>
<p>By default the dataset API will create files named “part-i.format” where “i” is a integer
generated during the write and “format” is the file format specified in the write_dataset
call. For simple datasets it may be possible to know which files will be created but for
larger or partitioned datasets it is not so easy. The <code class="docutils literal notranslate"><span class="pre">file_visitor</span></code> keyword can be used
to supply a visitor that will be called as each file is created:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [63]: </span><span class="k">def</span> <span class="nf">file_visitor</span><span class="p">(</span><span class="n">written_file</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;path=</span><span class="si">{</span><span class="n">written_file</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="gp"> ....: </span> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;size=</span><span class="si">{</span><span class="n">written_file</span><span class="o">.</span><span class="n">size</span><span class="si">}</span><span class="s2"> bytes&quot;</span><span class="p">)</span>
<span class="gp"> ....: </span> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;metadata=</span><span class="si">{</span><span class="n">written_file</span><span class="o">.</span><span class="n">metadata</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="gp"> ....: </span>
</pre></div>
</div>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [64]: </span><span class="n">ds</span><span class="o">.</span><span class="n">write_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s2">&quot;dataset_visited&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="n">part</span><span class="p">,</span>
<span class="gp"> ....: </span> <span class="n">file_visitor</span><span class="o">=</span><span class="n">file_visitor</span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="go">path=dataset_visited/c=2/part-0.parquet</span>
<span class="go">size=990 bytes</span>
<span class="go">metadata=&lt;pyarrow._parquet.FileMetaData object at 0x7fa1f1d2c630&gt;</span>
<span class="go"> created_by: parquet-cpp-arrow version 17.0.0-SNAPSHOT</span>
<span class="go"> num_columns: 2</span>
<span class="go"> num_rows: 5</span>
<span class="go"> num_row_groups: 1</span>
<span class="go"> format_version: 2.6</span>
<span class="go"> serialized_size: 0</span>
<span class="go">path=dataset_visited/c=1/part-0.parquet</span>
<span class="go">size=988 bytes</span>
<span class="go">metadata=&lt;pyarrow._parquet.FileMetaData object at 0x7fa1f1d2c630&gt;</span>
<span class="go"> created_by: parquet-cpp-arrow version 17.0.0-SNAPSHOT</span>
<span class="go"> num_columns: 2</span>
<span class="go"> num_rows: 5</span>
<span class="go"> num_row_groups: 1</span>
<span class="go"> format_version: 2.6</span>
<span class="go"> serialized_size: 0</span>
</pre></div>
</div>
<p>This will allow you to collect the filenames that belong to the dataset and store them elsewhere
which can be useful when you want to avoid scanning directories the next time you need to read
the data. It can also be used to generate the _metadata index file used by other tools such as
Dask or Spark to create an index of the dataset.</p>
</section>
<section id="configuring-format-specific-parameters-during-a-write">
<h3>Configuring format-specific parameters during a write<a class="headerlink" href="#configuring-format-specific-parameters-during-a-write" title="Permalink to this heading">#</a></h3>
<p>In addition to the common options shared by all formats there are also format specific options
that are unique to a particular format. For example, to allow truncated timestamps while writing
Parquet files:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [65]: </span><span class="n">parquet_format</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">ParquetFileFormat</span><span class="p">()</span>
<span class="gp">In [66]: </span><span class="n">write_options</span> <span class="o">=</span> <span class="n">parquet_format</span><span class="o">.</span><span class="n">make_write_options</span><span class="p">(</span><span class="n">allow_truncated_timestamps</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">In [67]: </span><span class="n">ds</span><span class="o">.</span><span class="n">write_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s2">&quot;sample_dataset2&quot;</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s2">&quot;parquet&quot;</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="n">part</span><span class="p">,</span>
<span class="gp"> ....: </span> <span class="n">file_options</span><span class="o">=</span><span class="n">write_options</span><span class="p">)</span>
<span class="gp"> ....: </span>
</pre></div>
</div>
</section>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="parquet.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Reading and Writing the Apache Parquet Format</p>
</div>
</a>
<a class="right-next"
href="flight.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Arrow Flight RPC</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-datasets">Reading Datasets</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset-discovery">Dataset discovery</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-different-file-formats">Reading different file formats</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#customizing-file-formats">Customizing file formats</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#filtering-data">Filtering data</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#projecting-columns">Projecting columns</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-partitioned-data">Reading partitioned data</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#different-partitioning-schemes">Different partitioning schemes</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-from-cloud-storage">Reading from cloud storage</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-from-minio">Reading from Minio</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#working-with-parquet-datasets">Working with Parquet Datasets</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#manual-specification-of-the-dataset">Manual specification of the Dataset</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#iterative-out-of-core-or-streaming-reads">Iterative (out of core or streaming) reads</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#customizing-the-batch-size">Customizing the batch size</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#a-note-on-transactions-acid-guarantees">A note on transactions &amp; ACID guarantees</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-datasets">Writing Datasets</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-partitioned-data">Writing partitioned data</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#partitioning-performance-considerations">Partitioning performance considerations</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#configuring-files-open-during-a-write">Configuring files open during a write</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#configuring-rows-per-group-during-a-write">Configuring rows per group during a write</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-large-amounts-of-data">Writing large amounts of data</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#customizing-inspecting-written-files">Customizing &amp; inspecting written files</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#configuring-format-specific-parameters-during-a-write">Configuring format-specific parameters during a write</a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/python/dataset.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>