blob: 48b566d4eb90f152c52f352eea2da4b1f671226a [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Reading and Writing the Apache Parquet Format &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'python/parquet';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/python/parquet.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Tabular Datasets" href="dataset.html" />
<link rel="prev" title="Reading JSON files" href="json.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="install.html">Installing PyArrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="getstarted.html">Getting Started</a></li>
<li class="toctree-l1"><a class="reference internal" href="data.html">Data Types and In-Memory Data Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="compute.html">Compute Functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="memory.html">Memory and IO Interfaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="ipc.html">Streaming, Serialization, and IPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="filesystems.html">Filesystem Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="numpy.html">NumPy Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="pandas.html">Pandas Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="interchange_protocol.html">Dataframe Interchange Protocol</a></li>
<li class="toctree-l1"><a class="reference internal" href="dlpack.html">The DLPack Protocol</a></li>
<li class="toctree-l1"><a class="reference internal" href="timestamps.html">Timestamps</a></li>
<li class="toctree-l1"><a class="reference internal" href="orc.html">Reading and Writing the Apache ORC Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l1"><a class="reference internal" href="feather.html">Feather File Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="json.html">Reading JSON files</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Reading and Writing the Apache Parquet Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="dataset.html">Tabular Datasets</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="extending_types.html">Extending pyarrow</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="integration.html">PyArrow Integrations</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="integration/python_r.html">Integrating PyArrow with R</a></li>
<li class="toctree-l2"><a class="reference internal" href="integration/python_java.html">Integrating PyArrow with Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="integration/extending.html">Using pyarrow from C++ and Cython Code</a></li>
<li class="toctree-l2"><a class="reference internal" href="integration/cuda.html">CUDA Integration</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="env_vars.html">Environment Variables</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/datatypes.html">Data Types and Schemas</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.null.html">pyarrow.null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.bool_.html">pyarrow.bool_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int8.html">pyarrow.int8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int16.html">pyarrow.int16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int32.html">pyarrow.int32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.int64.html">pyarrow.int64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint8.html">pyarrow.uint8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint16.html">pyarrow.uint16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint32.html">pyarrow.uint32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.uint64.html">pyarrow.uint64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.float16.html">pyarrow.float16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.float32.html">pyarrow.float32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.float64.html">pyarrow.float64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.time32.html">pyarrow.time32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.time64.html">pyarrow.time64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.timestamp.html">pyarrow.timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.date32.html">pyarrow.date32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.date64.html">pyarrow.date64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.duration.html">pyarrow.duration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.month_day_nano_interval.html">pyarrow.month_day_nano_interval</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.binary.html">pyarrow.binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.string.html">pyarrow.string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.utf8.html">pyarrow.utf8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_binary.html">pyarrow.large_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_string.html">pyarrow.large_string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_utf8.html">pyarrow.large_utf8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.binary_view.html">pyarrow.binary_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.string_view.html">pyarrow.string_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.decimal128.html">pyarrow.decimal128</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.list_.html">pyarrow.list_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_list.html">pyarrow.large_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.list_view.html">pyarrow.list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.large_list_view.html">pyarrow.large_list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.map_.html">pyarrow.map_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.struct.html">pyarrow.struct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dictionary.html">pyarrow.dictionary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.run_end_encoded.html">pyarrow.run_end_encoded</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.field.html">pyarrow.field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.schema.html">pyarrow.schema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.from_numpy_dtype.html">pyarrow.from_numpy_dtype</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.unify_schemas.html">pyarrow.unify_schemas</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DataType.html">pyarrow.DataType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DictionaryType.html">pyarrow.DictionaryType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListType.html">pyarrow.ListType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MapType.html">pyarrow.MapType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StructType.html">pyarrow.StructType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UnionType.html">pyarrow.UnionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TimestampType.html">pyarrow.TimestampType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time32Type.html">pyarrow.Time32Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time64Type.html">pyarrow.Time64Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBinaryType.html">pyarrow.FixedSizeBinaryType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Decimal128Type.html">pyarrow.Decimal128Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Field.html">pyarrow.Field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Schema.html">pyarrow.Schema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RunEndEncodedType.html">pyarrow.RunEndEncodedType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ExtensionType.html">pyarrow.ExtensionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.PyExtensionType.html">pyarrow.PyExtensionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.register_extension_type.html">pyarrow.register_extension_type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.unregister_extension_type.html">pyarrow.unregister_extension_type</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_boolean.html">pyarrow.types.is_boolean</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_integer.html">pyarrow.types.is_integer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_signed_integer.html">pyarrow.types.is_signed_integer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_unsigned_integer.html">pyarrow.types.is_unsigned_integer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int8.html">pyarrow.types.is_int8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int16.html">pyarrow.types.is_int16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int32.html">pyarrow.types.is_int32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_int64.html">pyarrow.types.is_int64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint8.html">pyarrow.types.is_uint8</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint16.html">pyarrow.types.is_uint16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint32.html">pyarrow.types.is_uint32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_uint64.html">pyarrow.types.is_uint64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_floating.html">pyarrow.types.is_floating</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_float16.html">pyarrow.types.is_float16</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_float32.html">pyarrow.types.is_float32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_float64.html">pyarrow.types.is_float64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_decimal.html">pyarrow.types.is_decimal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_decimal128.html">pyarrow.types.is_decimal128</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_decimal256.html">pyarrow.types.is_decimal256</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_list.html">pyarrow.types.is_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_list.html">pyarrow.types.is_large_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_fixed_size_list.html">pyarrow.types.is_fixed_size_list</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_list_view.html">pyarrow.types.is_list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_list_view.html">pyarrow.types.is_large_list_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_struct.html">pyarrow.types.is_struct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_union.html">pyarrow.types.is_union</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_nested.html">pyarrow.types.is_nested</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_run_end_encoded.html">pyarrow.types.is_run_end_encoded</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_temporal.html">pyarrow.types.is_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_timestamp.html">pyarrow.types.is_timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_date.html">pyarrow.types.is_date</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_date32.html">pyarrow.types.is_date32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_date64.html">pyarrow.types.is_date64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_time.html">pyarrow.types.is_time</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_time32.html">pyarrow.types.is_time32</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_time64.html">pyarrow.types.is_time64</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_duration.html">pyarrow.types.is_duration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_interval.html">pyarrow.types.is_interval</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_null.html">pyarrow.types.is_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_binary.html">pyarrow.types.is_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_unicode.html">pyarrow.types.is_unicode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_string.html">pyarrow.types.is_string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_binary.html">pyarrow.types.is_large_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_unicode.html">pyarrow.types.is_large_unicode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_large_string.html">pyarrow.types.is_large_string</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_binary_view.html">pyarrow.types.is_binary_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_string_view.html">pyarrow.types.is_string_view</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_fixed_size_binary.html">pyarrow.types.is_fixed_size_binary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_map.html">pyarrow.types.is_map</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_dictionary.html">pyarrow.types.is_dictionary</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.types.is_primitive.html">pyarrow.types.is_primitive</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/arrays.html">Arrays and Scalars</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.array.html">pyarrow.array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.nulls.html">pyarrow.nulls</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Array.html">pyarrow.Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BooleanArray.html">pyarrow.BooleanArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FloatingPointArray.html">pyarrow.FloatingPointArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.IntegerArray.html">pyarrow.IntegerArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int8Array.html">pyarrow.Int8Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int16Array.html">pyarrow.Int16Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int32Array.html">pyarrow.Int32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int64Array.html">pyarrow.Int64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NullArray.html">pyarrow.NullArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NumericArray.html">pyarrow.NumericArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt8Array.html">pyarrow.UInt8Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt16Array.html">pyarrow.UInt16Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt32Array.html">pyarrow.UInt32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt64Array.html">pyarrow.UInt64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.HalfFloatArray.html">pyarrow.HalfFloatArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FloatArray.html">pyarrow.FloatArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DoubleArray.html">pyarrow.DoubleArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BinaryArray.html">pyarrow.BinaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StringArray.html">pyarrow.StringArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBinaryArray.html">pyarrow.FixedSizeBinaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeBinaryArray.html">pyarrow.LargeBinaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeStringArray.html">pyarrow.LargeStringArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time32Array.html">pyarrow.Time32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time64Array.html">pyarrow.Time64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date32Array.html">pyarrow.Date32Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date64Array.html">pyarrow.Date64Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TimestampArray.html">pyarrow.TimestampArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DurationArray.html">pyarrow.DurationArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MonthDayNanoIntervalArray.html">pyarrow.MonthDayNanoIntervalArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Decimal128Array.html">pyarrow.Decimal128Array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DictionaryArray.html">pyarrow.DictionaryArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListArray.html">pyarrow.ListArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeListArray.html">pyarrow.FixedSizeListArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListArray.html">pyarrow.LargeListArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListViewArray.html">pyarrow.ListViewArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListViewArray.html">pyarrow.LargeListViewArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MapArray.html">pyarrow.MapArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RunEndEncodedArray.html">pyarrow.RunEndEncodedArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StructArray.html">pyarrow.StructArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UnionArray.html">pyarrow.UnionArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ExtensionArray.html">pyarrow.ExtensionArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedShapeTensorArray.html">pyarrow.FixedShapeTensorArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.scalar.html">pyarrow.scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NA.html">pyarrow.NA</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Scalar.html">pyarrow.Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BooleanScalar.html">pyarrow.BooleanScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int8Scalar.html">pyarrow.Int8Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int16Scalar.html">pyarrow.Int16Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int32Scalar.html">pyarrow.Int32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Int64Scalar.html">pyarrow.Int64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt8Scalar.html">pyarrow.UInt8Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt16Scalar.html">pyarrow.UInt16Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt32Scalar.html">pyarrow.UInt32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UInt64Scalar.html">pyarrow.UInt64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.HalfFloatScalar.html">pyarrow.HalfFloatScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FloatScalar.html">pyarrow.FloatScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DoubleScalar.html">pyarrow.DoubleScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BinaryScalar.html">pyarrow.BinaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StringScalar.html">pyarrow.StringScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBinaryScalar.html">pyarrow.FixedSizeBinaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeBinaryScalar.html">pyarrow.LargeBinaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeStringScalar.html">pyarrow.LargeStringScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BinaryViewScalar.html">pyarrow.BinaryViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StringViewScalar.html">pyarrow.StringViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time32Scalar.html">pyarrow.Time32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Time64Scalar.html">pyarrow.Time64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date32Scalar.html">pyarrow.Date32Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Date64Scalar.html">pyarrow.Date64Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TimestampScalar.html">pyarrow.TimestampScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DurationScalar.html">pyarrow.DurationScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MonthDayNanoIntervalScalar.html">pyarrow.MonthDayNanoIntervalScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Decimal128Scalar.html">pyarrow.Decimal128Scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.DictionaryScalar.html">pyarrow.DictionaryScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RunEndEncodedScalar.html">pyarrow.RunEndEncodedScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListScalar.html">pyarrow.ListScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListScalar.html">pyarrow.LargeListScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ListViewScalar.html">pyarrow.ListViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.LargeListViewScalar.html">pyarrow.LargeListViewScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MapScalar.html">pyarrow.MapScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.StructScalar.html">pyarrow.StructScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.UnionScalar.html">pyarrow.UnionScalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ExtensionScalar.html">pyarrow.ExtensionScalar</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/memory.html">Buffers and Memory</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.allocate_buffer.html">pyarrow.allocate_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.py_buffer.html">pyarrow.py_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.foreign_buffer.html">pyarrow.foreign_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Buffer.html">pyarrow.Buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ResizableBuffer.html">pyarrow.ResizableBuffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Codec.html">pyarrow.Codec</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compress.html">pyarrow.compress</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.decompress.html">pyarrow.decompress</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MemoryPool.html">pyarrow.MemoryPool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.default_memory_pool.html">pyarrow.default_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.jemalloc_memory_pool.html">pyarrow.jemalloc_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.mimalloc_memory_pool.html">pyarrow.mimalloc_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.system_memory_pool.html">pyarrow.system_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.jemalloc_set_decay_ms.html">pyarrow.jemalloc_set_decay_ms</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.set_memory_pool.html">pyarrow.set_memory_pool</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.log_memory_allocations.html">pyarrow.log_memory_allocations</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.total_allocated_bytes.html">pyarrow.total_allocated_bytes</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/tables.html">Tables and Tensors</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.chunked_array.html">pyarrow.chunked_array</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.concat_arrays.html">pyarrow.concat_arrays</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.concat_tables.html">pyarrow.concat_tables</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.record_batch.html">pyarrow.record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.table.html">pyarrow.table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ChunkedArray.html">pyarrow.ChunkedArray</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RecordBatch.html">pyarrow.RecordBatch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Table.html">pyarrow.Table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.TableGroupBy.html">pyarrow.TableGroupBy</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.RecordBatchReader.html">pyarrow.RecordBatchReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.interchange.from_dataframe.html">pyarrow.interchange.from_dataframe</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.Tensor.html">pyarrow.Tensor</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/compute.html">Compute Functions</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-7"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.all.html">pyarrow.compute.all</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.any.html">pyarrow.compute.any</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.approximate_median.html">pyarrow.compute.approximate_median</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count.html">pyarrow.compute.count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count_distinct.html">pyarrow.compute.count_distinct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.index.html">pyarrow.compute.index</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.max.html">pyarrow.compute.max</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.mean.html">pyarrow.compute.mean</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.min.html">pyarrow.compute.min</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.min_max.html">pyarrow.compute.min_max</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.mode.html">pyarrow.compute.mode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.product.html">pyarrow.compute.product</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.quantile.html">pyarrow.compute.quantile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.stddev.html">pyarrow.compute.stddev</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sum.html">pyarrow.compute.sum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.tdigest.html">pyarrow.compute.tdigest</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.variance.html">pyarrow.compute.variance</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_sum.html">pyarrow.compute.cumulative_sum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_sum_checked.html">pyarrow.compute.cumulative_sum_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_prod.html">pyarrow.compute.cumulative_prod</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_prod_checked.html">pyarrow.compute.cumulative_prod_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_max.html">pyarrow.compute.cumulative_max</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cumulative_min.html">pyarrow.compute.cumulative_min</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.abs.html">pyarrow.compute.abs</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.abs_checked.html">pyarrow.compute.abs_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.add.html">pyarrow.compute.add</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.add_checked.html">pyarrow.compute.add_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.divide.html">pyarrow.compute.divide</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.divide_checked.html">pyarrow.compute.divide_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.multiply.html">pyarrow.compute.multiply</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.multiply_checked.html">pyarrow.compute.multiply_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.negate.html">pyarrow.compute.negate</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.negate_checked.html">pyarrow.compute.negate_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.power.html">pyarrow.compute.power</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.power_checked.html">pyarrow.compute.power_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sign.html">pyarrow.compute.sign</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sqrt.html">pyarrow.compute.sqrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sqrt_checked.html">pyarrow.compute.sqrt_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.subtract.html">pyarrow.compute.subtract</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.subtract_checked.html">pyarrow.compute.subtract_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_and.html">pyarrow.compute.bit_wise_and</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_not.html">pyarrow.compute.bit_wise_not</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_or.html">pyarrow.compute.bit_wise_or</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.bit_wise_xor.html">pyarrow.compute.bit_wise_xor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_left.html">pyarrow.compute.shift_left</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_left_checked.html">pyarrow.compute.shift_left_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_right.html">pyarrow.compute.shift_right</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.shift_right_checked.html">pyarrow.compute.shift_right_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ceil.html">pyarrow.compute.ceil</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.floor.html">pyarrow.compute.floor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.round.html">pyarrow.compute.round</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.round_to_multiple.html">pyarrow.compute.round_to_multiple</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.trunc.html">pyarrow.compute.trunc</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ln.html">pyarrow.compute.ln</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ln_checked.html">pyarrow.compute.ln_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log10.html">pyarrow.compute.log10</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log10_checked.html">pyarrow.compute.log10_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log1p.html">pyarrow.compute.log1p</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log1p_checked.html">pyarrow.compute.log1p_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log2.html">pyarrow.compute.log2</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.log2_checked.html">pyarrow.compute.log2_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.logb.html">pyarrow.compute.logb</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.logb_checked.html">pyarrow.compute.logb_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.acos.html">pyarrow.compute.acos</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.acos_checked.html">pyarrow.compute.acos_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.asin.html">pyarrow.compute.asin</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.asin_checked.html">pyarrow.compute.asin_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.atan.html">pyarrow.compute.atan</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.atan2.html">pyarrow.compute.atan2</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cos.html">pyarrow.compute.cos</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cos_checked.html">pyarrow.compute.cos_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sin.html">pyarrow.compute.sin</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sin_checked.html">pyarrow.compute.sin_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.tan.html">pyarrow.compute.tan</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.tan_checked.html">pyarrow.compute.tan_checked</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.equal.html">pyarrow.compute.equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.greater.html">pyarrow.compute.greater</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.greater_equal.html">pyarrow.compute.greater_equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.less.html">pyarrow.compute.less</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.less_equal.html">pyarrow.compute.less_equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.not_equal.html">pyarrow.compute.not_equal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.max_element_wise.html">pyarrow.compute.max_element_wise</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.min_element_wise.html">pyarrow.compute.min_element_wise</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_.html">pyarrow.compute.and_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_kleene.html">pyarrow.compute.and_kleene</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_not.html">pyarrow.compute.and_not</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.and_not_kleene.html">pyarrow.compute.and_not_kleene</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.invert.html">pyarrow.compute.invert</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.or_.html">pyarrow.compute.or_</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.or_kleene.html">pyarrow.compute.or_kleene</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.xor.html">pyarrow.compute.xor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_alnum.html">pyarrow.compute.ascii_is_alnum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_alpha.html">pyarrow.compute.ascii_is_alpha</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_decimal.html">pyarrow.compute.ascii_is_decimal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_lower.html">pyarrow.compute.ascii_is_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_printable.html">pyarrow.compute.ascii_is_printable</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_space.html">pyarrow.compute.ascii_is_space</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_upper.html">pyarrow.compute.ascii_is_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_alnum.html">pyarrow.compute.utf8_is_alnum</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_alpha.html">pyarrow.compute.utf8_is_alpha</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_decimal.html">pyarrow.compute.utf8_is_decimal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_digit.html">pyarrow.compute.utf8_is_digit</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_lower.html">pyarrow.compute.utf8_is_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_numeric.html">pyarrow.compute.utf8_is_numeric</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_printable.html">pyarrow.compute.utf8_is_printable</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_space.html">pyarrow.compute.utf8_is_space</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_upper.html">pyarrow.compute.utf8_is_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_is_title.html">pyarrow.compute.ascii_is_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_is_title.html">pyarrow.compute.utf8_is_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.string_is_ascii.html">pyarrow.compute.string_is_ascii</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_capitalize.html">pyarrow.compute.ascii_capitalize</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_lower.html">pyarrow.compute.ascii_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_reverse.html">pyarrow.compute.ascii_reverse</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_swapcase.html">pyarrow.compute.ascii_swapcase</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_title.html">pyarrow.compute.ascii_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_upper.html">pyarrow.compute.ascii_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_length.html">pyarrow.compute.binary_length</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_repeat.html">pyarrow.compute.binary_repeat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_replace_slice.html">pyarrow.compute.binary_replace_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_reverse.html">pyarrow.compute.binary_reverse</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.replace_substring.html">pyarrow.compute.replace_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.replace_substring_regex.html">pyarrow.compute.replace_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_capitalize.html">pyarrow.compute.utf8_capitalize</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_length.html">pyarrow.compute.utf8_length</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_lower.html">pyarrow.compute.utf8_lower</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_replace_slice.html">pyarrow.compute.utf8_replace_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_reverse.html">pyarrow.compute.utf8_reverse</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_swapcase.html">pyarrow.compute.utf8_swapcase</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_title.html">pyarrow.compute.utf8_title</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_upper.html">pyarrow.compute.utf8_upper</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_center.html">pyarrow.compute.ascii_center</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_lpad.html">pyarrow.compute.ascii_lpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_rpad.html">pyarrow.compute.ascii_rpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_center.html">pyarrow.compute.utf8_center</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_lpad.html">pyarrow.compute.utf8_lpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_rpad.html">pyarrow.compute.utf8_rpad</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_ltrim.html">pyarrow.compute.ascii_ltrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_ltrim_whitespace.html">pyarrow.compute.ascii_ltrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_rtrim.html">pyarrow.compute.ascii_rtrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_rtrim_whitespace.html">pyarrow.compute.ascii_rtrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_trim.html">pyarrow.compute.ascii_trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_trim_whitespace.html">pyarrow.compute.ascii_trim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_ltrim.html">pyarrow.compute.utf8_ltrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_ltrim_whitespace.html">pyarrow.compute.utf8_ltrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_rtrim.html">pyarrow.compute.utf8_rtrim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_rtrim_whitespace.html">pyarrow.compute.utf8_rtrim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_trim.html">pyarrow.compute.utf8_trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_trim_whitespace.html">pyarrow.compute.utf8_trim_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ascii_split_whitespace.html">pyarrow.compute.ascii_split_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.split_pattern.html">pyarrow.compute.split_pattern</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.split_pattern_regex.html">pyarrow.compute.split_pattern_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_split_whitespace.html">pyarrow.compute.utf8_split_whitespace</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.extract_regex.html">pyarrow.compute.extract_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_join.html">pyarrow.compute.binary_join</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_join_element_wise.html">pyarrow.compute.binary_join_element_wise</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.binary_slice.html">pyarrow.compute.binary_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.utf8_slice_codeunits.html">pyarrow.compute.utf8_slice_codeunits</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count_substring.html">pyarrow.compute.count_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.count_substring_regex.html">pyarrow.compute.count_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ends_with.html">pyarrow.compute.ends_with</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.find_substring.html">pyarrow.compute.find_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.find_substring_regex.html">pyarrow.compute.find_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.index_in.html">pyarrow.compute.index_in</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_in.html">pyarrow.compute.is_in</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.match_like.html">pyarrow.compute.match_like</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.match_substring.html">pyarrow.compute.match_substring</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.match_substring_regex.html">pyarrow.compute.match_substring_regex</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.starts_with.html">pyarrow.compute.starts_with</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.indices_nonzero.html">pyarrow.compute.indices_nonzero</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_finite.html">pyarrow.compute.is_finite</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_inf.html">pyarrow.compute.is_inf</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_nan.html">pyarrow.compute.is_nan</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_null.html">pyarrow.compute.is_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_valid.html">pyarrow.compute.is_valid</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.true_unless_null.html">pyarrow.compute.true_unless_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.case_when.html">pyarrow.compute.case_when</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.choose.html">pyarrow.compute.choose</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.coalesce.html">pyarrow.compute.coalesce</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.if_else.html">pyarrow.compute.if_else</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.cast.html">pyarrow.compute.cast</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ceil_temporal.html">pyarrow.compute.ceil_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.floor_temporal.html">pyarrow.compute.floor_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.round_temporal.html">pyarrow.compute.round_temporal</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.run_end_decode.html">pyarrow.compute.run_end_decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.run_end_encode.html">pyarrow.compute.run_end_encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.strftime.html">pyarrow.compute.strftime</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.strptime.html">pyarrow.compute.strptime</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day.html">pyarrow.compute.day</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day_of_week.html">pyarrow.compute.day_of_week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day_of_year.html">pyarrow.compute.day_of_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.hour.html">pyarrow.compute.hour</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.iso_week.html">pyarrow.compute.iso_week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.iso_year.html">pyarrow.compute.iso_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.iso_calendar.html">pyarrow.compute.iso_calendar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.is_leap_year.html">pyarrow.compute.is_leap_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.microsecond.html">pyarrow.compute.microsecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.millisecond.html">pyarrow.compute.millisecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.minute.html">pyarrow.compute.minute</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.month.html">pyarrow.compute.month</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.nanosecond.html">pyarrow.compute.nanosecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.quarter.html">pyarrow.compute.quarter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.second.html">pyarrow.compute.second</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.subsecond.html">pyarrow.compute.subsecond</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.us_week.html">pyarrow.compute.us_week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.us_year.html">pyarrow.compute.us_year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.week.html">pyarrow.compute.week</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.year.html">pyarrow.compute.year</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.year_month_day.html">pyarrow.compute.year_month_day</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.day_time_interval_between.html">pyarrow.compute.day_time_interval_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.days_between.html">pyarrow.compute.days_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.hours_between.html">pyarrow.compute.hours_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.microseconds_between.html">pyarrow.compute.microseconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.milliseconds_between.html">pyarrow.compute.milliseconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.minutes_between.html">pyarrow.compute.minutes_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.month_day_nano_interval_between.html">pyarrow.compute.month_day_nano_interval_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.month_interval_between.html">pyarrow.compute.month_interval_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.nanoseconds_between.html">pyarrow.compute.nanoseconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.quarters_between.html">pyarrow.compute.quarters_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.seconds_between.html">pyarrow.compute.seconds_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.weeks_between.html">pyarrow.compute.weeks_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.years_between.html">pyarrow.compute.years_between</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.assume_timezone.html">pyarrow.compute.assume_timezone</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.local_timestamp.html">pyarrow.compute.local_timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.dictionary_encode.html">pyarrow.compute.dictionary_encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.unique.html">pyarrow.compute.unique</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.value_counts.html">pyarrow.compute.value_counts</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.array_filter.html">pyarrow.compute.array_filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.array_take.html">pyarrow.compute.array_take</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.drop_null.html">pyarrow.compute.drop_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.filter.html">pyarrow.compute.filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.take.html">pyarrow.compute.take</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.array_sort_indices.html">pyarrow.compute.array_sort_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.partition_nth_indices.html">pyarrow.compute.partition_nth_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.select_k_unstable.html">pyarrow.compute.select_k_unstable</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.sort_indices.html">pyarrow.compute.sort_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.fill_null.html">pyarrow.compute.fill_null</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.fill_null_backward.html">pyarrow.compute.fill_null_backward</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.fill_null_forward.html">pyarrow.compute.fill_null_forward</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_element.html">pyarrow.compute.list_element</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_flatten.html">pyarrow.compute.list_flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_parent_indices.html">pyarrow.compute.list_parent_indices</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_slice.html">pyarrow.compute.list_slice</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.list_value_length.html">pyarrow.compute.list_value_length</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.make_struct.html">pyarrow.compute.make_struct</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.map_lookup.html">pyarrow.compute.map_lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.replace_with_mask.html">pyarrow.compute.replace_with_mask</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.struct_field.html">pyarrow.compute.struct_field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.pairwise_diff.html">pyarrow.compute.pairwise_diff</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ArraySortOptions.html">pyarrow.compute.ArraySortOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.AssumeTimezoneOptions.html">pyarrow.compute.AssumeTimezoneOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CastOptions.html">pyarrow.compute.CastOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CountOptions.html">pyarrow.compute.CountOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CountOptions.html">pyarrow.compute.CountOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.CumulativeSumOptions.html">pyarrow.compute.CumulativeSumOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.DayOfWeekOptions.html">pyarrow.compute.DayOfWeekOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.DictionaryEncodeOptions.html">pyarrow.compute.DictionaryEncodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ElementWiseAggregateOptions.html">pyarrow.compute.ElementWiseAggregateOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ExtractRegexOptions.html">pyarrow.compute.ExtractRegexOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.FilterOptions.html">pyarrow.compute.FilterOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.IndexOptions.html">pyarrow.compute.IndexOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.JoinOptions.html">pyarrow.compute.JoinOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ListSliceOptions.html">pyarrow.compute.ListSliceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.MakeStructOptions.html">pyarrow.compute.MakeStructOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.MapLookupOptions.html">pyarrow.compute.MapLookupOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.MatchSubstringOptions.html">pyarrow.compute.MatchSubstringOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ModeOptions.html">pyarrow.compute.ModeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.NullOptions.html">pyarrow.compute.NullOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.PadOptions.html">pyarrow.compute.PadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.PairwiseOptions.html">pyarrow.compute.PairwiseOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.PartitionNthOptions.html">pyarrow.compute.PartitionNthOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.QuantileOptions.html">pyarrow.compute.QuantileOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ReplaceSliceOptions.html">pyarrow.compute.ReplaceSliceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ReplaceSubstringOptions.html">pyarrow.compute.ReplaceSubstringOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RoundOptions.html">pyarrow.compute.RoundOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RoundTemporalOptions.html">pyarrow.compute.RoundTemporalOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RoundToMultipleOptions.html">pyarrow.compute.RoundToMultipleOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.RunEndEncodeOptions.html">pyarrow.compute.RunEndEncodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ScalarAggregateOptions.html">pyarrow.compute.ScalarAggregateOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.ScalarAggregateOptions.html">pyarrow.compute.ScalarAggregateOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SelectKOptions.html">pyarrow.compute.SelectKOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SetLookupOptions.html">pyarrow.compute.SetLookupOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SliceOptions.html">pyarrow.compute.SliceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SortOptions.html">pyarrow.compute.SortOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SplitOptions.html">pyarrow.compute.SplitOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.SplitPatternOptions.html">pyarrow.compute.SplitPatternOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.StrftimeOptions.html">pyarrow.compute.StrftimeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.StrptimeOptions.html">pyarrow.compute.StrptimeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.StructFieldOptions.html">pyarrow.compute.StructFieldOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TakeOptions.html">pyarrow.compute.TakeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TDigestOptions.html">pyarrow.compute.TDigestOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TDigestOptions.html">pyarrow.compute.TDigestOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.TrimOptions.html">pyarrow.compute.TrimOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.VarianceOptions.html">pyarrow.compute.VarianceOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.WeekOptions.html">pyarrow.compute.WeekOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.register_scalar_function.html">pyarrow.compute.register_scalar_function</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.compute.UdfContext.html">pyarrow.compute.UdfContext</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/acero.html">Acero - Streaming Execution Engine</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-8"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.Declaration.html">pyarrow.acero.Declaration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.ExecNodeOptions.html">pyarrow.acero.ExecNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.TableSourceNodeOptions.html">pyarrow.acero.TableSourceNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.ScanNodeOptions.html">pyarrow.acero.ScanNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.FilterNodeOptions.html">pyarrow.acero.FilterNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.ProjectNodeOptions.html">pyarrow.acero.ProjectNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.AggregateNodeOptions.html">pyarrow.acero.AggregateNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.OrderByNodeOptions.html">pyarrow.acero.OrderByNodeOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.acero.HashJoinNodeOptions.html">pyarrow.acero.HashJoinNodeOptions</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/substrait.html">Substrait</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-9"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.run_query.html">pyarrow.substrait.run_query</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.BoundExpressions.html">pyarrow.substrait.BoundExpressions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.deserialize_expressions.html">pyarrow.substrait.deserialize_expressions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.serialize_expressions.html">pyarrow.substrait.serialize_expressions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.substrait.get_supported_functions.html">pyarrow.substrait.get_supported_functions</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/files.html">Streams and File Access</a><input class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-10"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.input_stream.html">pyarrow.input_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.output_stream.html">pyarrow.output_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.memory_map.html">pyarrow.memory_map</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.create_memory_map.html">pyarrow.create_memory_map</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.NativeFile.html">pyarrow.NativeFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.OSFile.html">pyarrow.OSFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.PythonFile.html">pyarrow.PythonFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BufferReader.html">pyarrow.BufferReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.BufferOutputStream.html">pyarrow.BufferOutputStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.FixedSizeBufferWriter.html">pyarrow.FixedSizeBufferWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.MemoryMappedFile.html">pyarrow.MemoryMappedFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.CompressedInputStream.html">pyarrow.CompressedInputStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.CompressedOutputStream.html">pyarrow.CompressedOutputStream</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/ipc.html">Serialization and IPC</a><input class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-11"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.new_file.html">pyarrow.ipc.new_file</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.open_file.html">pyarrow.ipc.open_file</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.new_stream.html">pyarrow.ipc.new_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.open_stream.html">pyarrow.ipc.open_stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.read_message.html">pyarrow.ipc.read_message</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.read_record_batch.html">pyarrow.ipc.read_record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.get_record_batch_size.html">pyarrow.ipc.get_record_batch_size</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.read_tensor.html">pyarrow.ipc.read_tensor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.write_tensor.html">pyarrow.ipc.write_tensor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.get_tensor_size.html">pyarrow.ipc.get_tensor_size</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.IpcReadOptions.html">pyarrow.ipc.IpcReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.IpcWriteOptions.html">pyarrow.ipc.IpcWriteOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.Message.html">pyarrow.ipc.Message</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.MessageReader.html">pyarrow.ipc.MessageReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchFileReader.html">pyarrow.ipc.RecordBatchFileReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchFileWriter.html">pyarrow.ipc.RecordBatchFileWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchStreamReader.html">pyarrow.ipc.RecordBatchStreamReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.ipc.RecordBatchStreamWriter.html">pyarrow.ipc.RecordBatchStreamWriter</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/flight.html">Arrow Flight</a><input class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-12"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Action.html">pyarrow.flight.Action</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ActionType.html">pyarrow.flight.ActionType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.DescriptorType.html">pyarrow.flight.DescriptorType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightDescriptor.html">pyarrow.flight.FlightDescriptor</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightEndpoint.html">pyarrow.flight.FlightEndpoint</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightInfo.html">pyarrow.flight.FlightInfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Location.html">pyarrow.flight.Location</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.MetadataRecordBatchReader.html">pyarrow.flight.MetadataRecordBatchReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.MetadataRecordBatchWriter.html">pyarrow.flight.MetadataRecordBatchWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Ticket.html">pyarrow.flight.Ticket</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.Result.html">pyarrow.flight.Result</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.connect.html">pyarrow.flight.connect</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightCallOptions.html">pyarrow.flight.FlightCallOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightClient.html">pyarrow.flight.FlightClient</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightStreamReader.html">pyarrow.flight.FlightStreamReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightStreamWriter.html">pyarrow.flight.FlightStreamWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ClientMiddlewareFactory.html">pyarrow.flight.ClientMiddlewareFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ClientMiddleware.html">pyarrow.flight.ClientMiddleware</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightDataStream.html">pyarrow.flight.FlightDataStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightMetadataWriter.html">pyarrow.flight.FlightMetadataWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightServerBase.html">pyarrow.flight.FlightServerBase</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.GeneratorStream.html">pyarrow.flight.GeneratorStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.RecordBatchStream.html">pyarrow.flight.RecordBatchStream</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerCallContext.html">pyarrow.flight.ServerCallContext</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerMiddlewareFactory.html">pyarrow.flight.ServerMiddlewareFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerMiddleware.html">pyarrow.flight.ServerMiddleware</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ClientAuthHandler.html">pyarrow.flight.ClientAuthHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.ServerAuthHandler.html">pyarrow.flight.ServerAuthHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightError.html">pyarrow.flight.FlightError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightCancelledError.html">pyarrow.flight.FlightCancelledError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightInternalError.html">pyarrow.flight.FlightInternalError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightServerError.html">pyarrow.flight.FlightServerError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightTimedOutError.html">pyarrow.flight.FlightTimedOutError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightUnauthenticatedError.html">pyarrow.flight.FlightUnauthenticatedError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightUnauthorizedError.html">pyarrow.flight.FlightUnauthorizedError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightUnavailableError.html">pyarrow.flight.FlightUnavailableError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightWriteSizeExceededError.html">pyarrow.flight.FlightWriteSizeExceededError</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.FlightMethod.html">pyarrow.flight.FlightMethod</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.flight.CallInfo.html">pyarrow.flight.CallInfo</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/formats.html">Tabular File Formats</a><input class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-13"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ConvertOptions.html">pyarrow.csv.ConvertOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.CSVStreamingReader.html">pyarrow.csv.CSVStreamingReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.CSVWriter.html">pyarrow.csv.CSVWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ISO8601.html">pyarrow.csv.ISO8601</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ParseOptions.html">pyarrow.csv.ParseOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.ReadOptions.html">pyarrow.csv.ReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.WriteOptions.html">pyarrow.csv.WriteOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.open_csv.html">pyarrow.csv.open_csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.read_csv.html">pyarrow.csv.read_csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.write_csv.html">pyarrow.csv.write_csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.csv.InvalidRow.html">pyarrow.csv.InvalidRow</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.feather.read_feather.html">pyarrow.feather.read_feather</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.feather.read_table.html">pyarrow.feather.read_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.feather.write_feather.html">pyarrow.feather.write_feather</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.json.ReadOptions.html">pyarrow.json.ReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.json.ParseOptions.html">pyarrow.json.ParseOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.json.read_json.html">pyarrow.json.read_json</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetDataset.html">pyarrow.parquet.ParquetDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetFile.html">pyarrow.parquet.ParquetFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetWriter.html">pyarrow.parquet.ParquetWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_table.html">pyarrow.parquet.read_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_metadata.html">pyarrow.parquet.read_metadata</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_pandas.html">pyarrow.parquet.read_pandas</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.read_schema.html">pyarrow.parquet.read_schema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.write_metadata.html">pyarrow.parquet.write_metadata</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.write_table.html">pyarrow.parquet.write_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.write_to_dataset.html">pyarrow.parquet.write_to_dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.FileMetaData.html">pyarrow.parquet.FileMetaData</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.RowGroupMetaData.html">pyarrow.parquet.RowGroupMetaData</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.SortingColumn.html">pyarrow.parquet.SortingColumn</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ColumnChunkMetaData.html">pyarrow.parquet.ColumnChunkMetaData</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.Statistics.html">pyarrow.parquet.Statistics</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetSchema.html">pyarrow.parquet.ParquetSchema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ColumnSchema.html">pyarrow.parquet.ColumnSchema</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.ParquetLogicalType.html">pyarrow.parquet.ParquetLogicalType</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.CryptoFactory.html">pyarrow.parquet.encryption.CryptoFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.KmsClient.html">pyarrow.parquet.encryption.KmsClient</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.KmsConnectionConfig.html">pyarrow.parquet.encryption.KmsConnectionConfig</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.EncryptionConfiguration.html">pyarrow.parquet.encryption.EncryptionConfiguration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.parquet.encryption.DecryptionConfiguration.html">pyarrow.parquet.encryption.DecryptionConfiguration</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.ORCFile.html">pyarrow.orc.ORCFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.ORCWriter.html">pyarrow.orc.ORCWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.read_table.html">pyarrow.orc.read_table</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.orc.write_table.html">pyarrow.orc.write_table</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/filesystems.html">Filesystems</a><input class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-14"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileInfo.html">pyarrow.fs.FileInfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileSelector.html">pyarrow.fs.FileSelector</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileSystem.html">pyarrow.fs.FileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.LocalFileSystem.html">pyarrow.fs.LocalFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.S3FileSystem.html">pyarrow.fs.S3FileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.GcsFileSystem.html">pyarrow.fs.GcsFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.HadoopFileSystem.html">pyarrow.fs.HadoopFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.SubTreeFileSystem.html">pyarrow.fs.SubTreeFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.PyFileSystem.html">pyarrow.fs.PyFileSystem</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FileSystemHandler.html">pyarrow.fs.FileSystemHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.FSSpecHandler.html">pyarrow.fs.FSSpecHandler</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.copy_files.html">pyarrow.fs.copy_files</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.initialize_s3.html">pyarrow.fs.initialize_s3</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.finalize_s3.html">pyarrow.fs.finalize_s3</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.resolve_s3_region.html">pyarrow.fs.resolve_s3_region</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.fs.S3LogLevel.html">pyarrow.fs.S3LogLevel</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/dataset.html">Dataset</a><input class="toctree-checkbox" id="toctree-checkbox-15" name="toctree-checkbox-15" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-15"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.dataset.html">pyarrow.dataset.dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.parquet_dataset.html">pyarrow.dataset.parquet_dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.partitioning.html">pyarrow.dataset.partitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.field.html">pyarrow.dataset.field</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.scalar.html">pyarrow.dataset.scalar</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.write_dataset.html">pyarrow.dataset.write_dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileFormat.html">pyarrow.dataset.FileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.CsvFileFormat.html">pyarrow.dataset.CsvFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.CsvFragmentScanOptions.html">pyarrow.dataset.CsvFragmentScanOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.IpcFileFormat.html">pyarrow.dataset.IpcFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.JsonFileFormat.html">pyarrow.dataset.JsonFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetFileFormat.html">pyarrow.dataset.ParquetFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetReadOptions.html">pyarrow.dataset.ParquetReadOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetFragmentScanOptions.html">pyarrow.dataset.ParquetFragmentScanOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.ParquetFileFragment.html">pyarrow.dataset.ParquetFileFragment</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.OrcFileFormat.html">pyarrow.dataset.OrcFileFormat</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Partitioning.html">pyarrow.dataset.Partitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.PartitioningFactory.html">pyarrow.dataset.PartitioningFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.DirectoryPartitioning.html">pyarrow.dataset.DirectoryPartitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.HivePartitioning.html">pyarrow.dataset.HivePartitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FilenamePartitioning.html">pyarrow.dataset.FilenamePartitioning</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Dataset.html">pyarrow.dataset.Dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileSystemDataset.html">pyarrow.dataset.FileSystemDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileSystemFactoryOptions.html">pyarrow.dataset.FileSystemFactoryOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FileSystemDatasetFactory.html">pyarrow.dataset.FileSystemDatasetFactory</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.UnionDataset.html">pyarrow.dataset.UnionDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Fragment.html">pyarrow.dataset.Fragment</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.FragmentScanOptions.html">pyarrow.dataset.FragmentScanOptions</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.TaggedRecordBatch.html">pyarrow.dataset.TaggedRecordBatch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Scanner.html">pyarrow.dataset.Scanner</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.Expression.html">pyarrow.dataset.Expression</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.InMemoryDataset.html">pyarrow.dataset.InMemoryDataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.WrittenFile.html">pyarrow.dataset.WrittenFile</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.dataset.get_partition_keys.html">pyarrow.dataset.get_partition_keys</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/cuda.html">CUDA Integration</a><input class="toctree-checkbox" id="toctree-checkbox-16" name="toctree-checkbox-16" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-16"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.Context.html">pyarrow.cuda.Context</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.CudaBuffer.html">pyarrow.cuda.CudaBuffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.new_host_buffer.html">pyarrow.cuda.new_host_buffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.HostBuffer.html">pyarrow.cuda.HostBuffer</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.BufferReader.html">pyarrow.cuda.BufferReader</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.BufferWriter.html">pyarrow.cuda.BufferWriter</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.serialize_record_batch.html">pyarrow.cuda.serialize_record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.read_record_batch.html">pyarrow.cuda.read_record_batch</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.read_message.html">pyarrow.cuda.read_message</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cuda.IpcMemHandle.html">pyarrow.cuda.IpcMemHandle</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="api/misc.html">Miscellaneous</a><input class="toctree-checkbox" id="toctree-checkbox-17" name="toctree-checkbox-17" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-17"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.cpu_count.html">pyarrow.cpu_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.set_cpu_count.html">pyarrow.set_cpu_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.io_thread_count.html">pyarrow.io_thread_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.set_io_thread_count.html">pyarrow.set_io_thread_count</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.get_include.html">pyarrow.get_include</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.get_libraries.html">pyarrow.get_libraries</a></li>
<li class="toctree-l3"><a class="reference internal" href="generated/pyarrow.get_library_dirs.html">pyarrow.get_library_dirs</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="getting_involved.html">Getting Involved</a></li>
<li class="toctree-l1"><a class="reference internal" href="benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/py/">Python cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Python</a></li>
<li class="breadcrumb-item active" aria-current="page">Reading and...</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="reading-and-writing-the-apache-parquet-format">
<span id="parquet"></span><h1>Reading and Writing the Apache Parquet Format<a class="headerlink" href="#reading-and-writing-the-apache-parquet-format" title="Permalink to this heading">#</a></h1>
<p>The <a class="reference external" href="http://parquet.apache.org/">Apache Parquet</a> project provides a
standardized open-source columnar storage format for use in data analysis
systems. It was created originally for use in <a class="reference external" href="http://hadoop.apache.org/">Apache Hadoop</a> with systems like <a class="reference external" href="http://drill.apache.org">Apache Drill</a>, <a class="reference external" href="http://hive.apache.org">Apache Hive</a>, <a class="reference external" href="http://impala.apache.org">Apache
Impala</a>, and <a class="reference external" href="http://spark.apache.org">Apache Spark</a> adopting it as a shared standard for high
performance data IO.</p>
<p>Apache Arrow is an ideal in-memory transport layer for data that is being read
or written with Parquet files. We have been concurrently developing the <a class="reference external" href="https://github.com/apache/arrow/tree/main/cpp/tools/parquet">C++
implementation of
Apache Parquet</a>,
which includes a native, multithreaded C++ adapter to and from in-memory Arrow
data. PyArrow includes Python bindings to this code, which thus enables reading
and writing Parquet files with pandas as well.</p>
<section id="obtaining-pyarrow-with-parquet-support">
<h2>Obtaining pyarrow with Parquet Support<a class="headerlink" href="#obtaining-pyarrow-with-parquet-support" title="Permalink to this heading">#</a></h2>
<p>If you installed <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code> with pip or conda, it should be built with Parquet
support bundled:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [1]: </span><span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">pq</span>
</pre></div>
</div>
<p>If you are building <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code> from source, you must use <code class="docutils literal notranslate"><span class="pre">-DARROW_PARQUET=ON</span></code>
when compiling the C++ libraries and enable the Parquet extensions when
building <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>. If you want to use Parquet Encryption, then you must
use <code class="docutils literal notranslate"><span class="pre">-DPARQUET_REQUIRE_ENCRYPTION=ON</span></code> too when compiling the C++ libraries.
See the <a class="reference internal" href="../developers/python.html#python-development"><span class="std std-ref">Python Development</span></a> page for more details.</p>
</section>
<section id="reading-and-writing-single-files">
<h2>Reading and Writing Single Files<a class="headerlink" href="#reading-and-writing-single-files" title="Permalink to this heading">#</a></h2>
<p>The functions <a class="reference internal" href="generated/pyarrow.parquet.read_table.html#pyarrow.parquet.read_table" title="pyarrow.parquet.read_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_table()</span></code></a> and <a class="reference internal" href="generated/pyarrow.parquet.write_table.html#pyarrow.parquet.write_table" title="pyarrow.parquet.write_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_table()</span></code></a>
read and write the <a class="reference internal" href="data.html#data-table"><span class="std std-ref">pyarrow.Table</span></a> object, respectively.</p>
<p>Let’s look at a simple table:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [2]: </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="gp">In [3]: </span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="gp">In [4]: </span><span class="kn">import</span> <span class="nn">pyarrow</span> <span class="k">as</span> <span class="nn">pa</span>
<span class="gp">In [5]: </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;one&#39;</span><span class="p">:</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">],</span>
<span class="gp"> ...: </span> <span class="s1">&#39;two&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">],</span>
<span class="gp"> ...: </span> <span class="s1">&#39;three&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">]},</span>
<span class="gp"> ...: </span> <span class="n">index</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;abc&#39;</span><span class="p">))</span>
<span class="gp"> ...: </span>
<span class="gp">In [6]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
</pre></div>
</div>
<p>We write this to Parquet format with <code class="docutils literal notranslate"><span class="pre">write_table</span></code>:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [7]: </span><span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">pq</span>
<span class="gp">In [8]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s1">&#39;example.parquet&#39;</span><span class="p">)</span>
</pre></div>
</div>
<p>This creates a single Parquet file. In practice, a Parquet dataset may consist
of many files in many directories. We can read a single file back with
<code class="docutils literal notranslate"><span class="pre">read_table</span></code>:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [9]: </span><span class="n">table2</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s1">&#39;example.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [10]: </span><span class="n">table2</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[10]: </span>
<span class="go"> one two three</span>
<span class="go">a -1.0 foo True</span>
<span class="go">b NaN bar False</span>
<span class="go">c 2.5 baz True</span>
</pre></div>
</div>
<p>You can pass a subset of columns to read, which can be much faster than reading
the whole file (due to the columnar layout):</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [11]: </span><span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s1">&#39;example.parquet&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;three&#39;</span><span class="p">])</span>
<span class="gh">Out[11]: </span>
<span class="go">pyarrow.Table</span>
<span class="go">one: double</span>
<span class="go">three: bool</span>
<span class="gt">----</span>
<span class="ne">one</span>: [[-1,null,2.5]]
<span class="ne">three</span>: [[true,false,true]]
</pre></div>
</div>
<p>When reading a subset of columns from a file that used a Pandas dataframe as the
source, we use <code class="docutils literal notranslate"><span class="pre">read_pandas</span></code> to maintain any additional index column data:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [12]: </span><span class="n">pq</span><span class="o">.</span><span class="n">read_pandas</span><span class="p">(</span><span class="s1">&#39;example.parquet&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;two&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[12]: </span>
<span class="go"> two</span>
<span class="go">a foo</span>
<span class="go">b bar</span>
<span class="go">c baz</span>
</pre></div>
</div>
<p>We do not need to use a string to specify the origin of the file. It can be any of:</p>
<ul class="simple">
<li><p>A file path as a string</p></li>
<li><p>A <a class="reference internal" href="memory.html#io-native-file"><span class="std std-ref">NativeFile</span></a> from PyArrow</p></li>
<li><p>A Python file object</p></li>
</ul>
<p>In general, a Python file object will have the worst read performance, while a
string file path or an instance of <a class="reference internal" href="generated/pyarrow.NativeFile.html#pyarrow.NativeFile" title="pyarrow.NativeFile"><code class="xref py py-class docutils literal notranslate"><span class="pre">NativeFile</span></code></a> (especially memory
maps) will perform the best.</p>
<section id="reading-parquet-and-memory-mapping">
<span id="parquet-mmap"></span><h3>Reading Parquet and Memory Mapping<a class="headerlink" href="#reading-parquet-and-memory-mapping" title="Permalink to this heading">#</a></h3>
<p>Because Parquet data needs to be decoded from the Parquet format
and compression, it can’t be directly mapped from disk.
Thus the <code class="docutils literal notranslate"><span class="pre">memory_map</span></code> option might perform better on some systems
but won’t help much with resident memory consumption.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">pq_array</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">parquet</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s2">&quot;area1.parquet&quot;</span><span class="p">,</span> <span class="n">memory_map</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">print</span><span class="p">(</span><span class="s2">&quot;RSS: </span><span class="si">{}</span><span class="s2">MB&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">total_allocated_bytes</span><span class="p">()</span> <span class="o">&gt;&gt;</span> <span class="mi">20</span><span class="p">))</span>
<span class="go">RSS: 4299MB</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pq_array</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">parquet</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s2">&quot;area1.parquet&quot;</span><span class="p">,</span> <span class="n">memory_map</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">print</span><span class="p">(</span><span class="s2">&quot;RSS: </span><span class="si">{}</span><span class="s2">MB&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">total_allocated_bytes</span><span class="p">()</span> <span class="o">&gt;&gt;</span> <span class="mi">20</span><span class="p">))</span>
<span class="go">RSS: 4299MB</span>
</pre></div>
</div>
<p>If you need to deal with Parquet data bigger than memory,
the <a class="reference internal" href="dataset.html#dataset"><span class="std std-ref">Tabular Datasets</span></a> and partitioning is probably what you are looking for.</p>
</section>
<section id="parquet-file-writing-options">
<h3>Parquet file writing options<a class="headerlink" href="#parquet-file-writing-options" title="Permalink to this heading">#</a></h3>
<p><a class="reference internal" href="generated/pyarrow.parquet.write_table.html#pyarrow.parquet.write_table" title="pyarrow.parquet.write_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_table()</span></code></a> has a number of options to
control various settings when writing a Parquet file.</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">version</span></code>, the Parquet format version to use. <code class="docutils literal notranslate"><span class="pre">'1.0'</span></code> ensures
compatibility with older readers, while <code class="docutils literal notranslate"><span class="pre">'2.4'</span></code> and greater values
enable more Parquet types and encodings.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">data_page_size</span></code>, to control the approximate size of encoded data
pages within a column chunk. This currently defaults to 1MB.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">flavor</span></code>, to set compatibility options particular to a Parquet
consumer like <code class="docutils literal notranslate"><span class="pre">'spark'</span></code> for Apache Spark.</p></li>
</ul>
<p>See the <a class="reference internal" href="generated/pyarrow.parquet.write_table.html#pyarrow.parquet.write_table" title="pyarrow.parquet.write_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_table()</span></code></a> docstring for more details.</p>
<p>There are some additional data type handling-specific options
described below.</p>
</section>
<section id="omitting-the-dataframe-index">
<h3>Omitting the DataFrame index<a class="headerlink" href="#omitting-the-dataframe-index" title="Permalink to this heading">#</a></h3>
<p>When using <code class="docutils literal notranslate"><span class="pre">pa.Table.from_pandas</span></code> to convert to an Arrow table, by default
one or more special columns are added to keep track of the index (row
labels). Storing the index takes extra space, so if your index is not valuable,
you may choose to omit it by passing <code class="docutils literal notranslate"><span class="pre">preserve_index=False</span></code></p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [13]: </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;one&#39;</span><span class="p">:</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">],</span>
<span class="gp"> ....: </span> <span class="s1">&#39;two&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">],</span>
<span class="gp"> ....: </span> <span class="s1">&#39;three&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">]},</span>
<span class="gp"> ....: </span> <span class="n">index</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;abc&#39;</span><span class="p">))</span>
<span class="gp"> ....: </span>
<span class="gp">In [14]: </span><span class="n">df</span>
<span class="gh">Out[14]: </span>
<span class="go"> one two three</span>
<span class="go">a -1.0 foo True</span>
<span class="go">b NaN bar False</span>
<span class="go">c 2.5 baz True</span>
<span class="gp">In [15]: </span><span class="n">table</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">preserve_index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</pre></div>
</div>
<p>Then we have:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [16]: </span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="s1">&#39;example_noindex.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [17]: </span><span class="n">t</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s1">&#39;example_noindex.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [18]: </span><span class="n">t</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
<span class="gh">Out[18]: </span>
<span class="go"> one two three</span>
<span class="go">0 -1.0 foo True</span>
<span class="go">1 NaN bar False</span>
<span class="go">2 2.5 baz True</span>
</pre></div>
</div>
<p>Here you see the index did not survive the round trip.</p>
</section>
</section>
<section id="finer-grained-reading-and-writing">
<h2>Finer-grained Reading and Writing<a class="headerlink" href="#finer-grained-reading-and-writing" title="Permalink to this heading">#</a></h2>
<p><code class="docutils literal notranslate"><span class="pre">read_table</span></code> uses the <a class="reference internal" href="generated/pyarrow.parquet.ParquetFile.html#pyarrow.parquet.ParquetFile" title="pyarrow.parquet.ParquetFile"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetFile</span></code></a> class, which has other features:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [19]: </span><span class="n">parquet_file</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetFile</span><span class="p">(</span><span class="s1">&#39;example.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [20]: </span><span class="n">parquet_file</span><span class="o">.</span><span class="n">metadata</span>
<span class="gh">Out[20]: </span>
<span class="go">&lt;pyarrow._parquet.FileMetaData object at 0x7fa1f1e841d0&gt;</span>
<span class="go"> created_by: parquet-cpp-arrow version 17.0.0-SNAPSHOT</span>
<span class="go"> num_columns: 4</span>
<span class="go"> num_rows: 3</span>
<span class="go"> num_row_groups: 1</span>
<span class="go"> format_version: 2.6</span>
<span class="go"> serialized_size: 2603</span>
<span class="gp">In [21]: </span><span class="n">parquet_file</span><span class="o">.</span><span class="n">schema</span>
<span class="gh">Out[21]: </span>
<span class="go">&lt;pyarrow._parquet.ParquetSchema object at 0x7fa1f1d99c80&gt;</span>
<span class="go">required group field_id=-1 schema {</span>
<span class="go"> optional double field_id=-1 one;</span>
<span class="go"> optional binary field_id=-1 two (String);</span>
<span class="go"> optional boolean field_id=-1 three;</span>
<span class="go"> optional binary field_id=-1 __index_level_0__ (String);</span>
<span class="go">}</span>
</pre></div>
</div>
<p>As you can learn more in the <a class="reference external" href="https://github.com/apache/parquet-format">Apache Parquet format</a>, a Parquet file consists of
multiple row groups. <code class="docutils literal notranslate"><span class="pre">read_table</span></code> will read all of the row groups and
concatenate them into a single table. You can read individual row groups with
<code class="docutils literal notranslate"><span class="pre">read_row_group</span></code>:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [22]: </span><span class="n">parquet_file</span><span class="o">.</span><span class="n">num_row_groups</span>
<span class="gh">Out[22]: </span><span class="go">1</span>
<span class="gp">In [23]: </span><span class="n">parquet_file</span><span class="o">.</span><span class="n">read_row_group</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="gh">Out[23]: </span>
<span class="go">pyarrow.Table</span>
<span class="go">one: double</span>
<span class="go">two: string</span>
<span class="go">three: bool</span>
<span class="go">__index_level_0__: string</span>
<span class="gt">----</span>
<span class="ne">one</span>: [[-1,null,2.5]]
<span class="ne">two</span>: [[&quot;foo&quot;,&quot;bar&quot;,&quot;baz&quot;]]
<span class="ne">three</span>: [[true,false,true]]
<span class="ne">__index_level_0__</span>: [[&quot;a&quot;,&quot;b&quot;,&quot;c&quot;]]
</pre></div>
</div>
<p>We can similarly write a Parquet file with multiple row groups by using
<code class="docutils literal notranslate"><span class="pre">ParquetWriter</span></code>:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [24]: </span><span class="k">with</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetWriter</span><span class="p">(</span><span class="s1">&#39;example2.parquet&#39;</span><span class="p">,</span> <span class="n">table</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="gp"> ....: </span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="n">writer</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="gp">In [25]: </span><span class="n">pf2</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetFile</span><span class="p">(</span><span class="s1">&#39;example2.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [26]: </span><span class="n">pf2</span><span class="o">.</span><span class="n">num_row_groups</span>
<span class="gh">Out[26]: </span><span class="go">3</span>
</pre></div>
</div>
</section>
<section id="inspecting-the-parquet-file-metadata">
<h2>Inspecting the Parquet File Metadata<a class="headerlink" href="#inspecting-the-parquet-file-metadata" title="Permalink to this heading">#</a></h2>
<p>The <code class="docutils literal notranslate"><span class="pre">FileMetaData</span></code> of a Parquet file can be accessed through
<a class="reference internal" href="generated/pyarrow.parquet.ParquetFile.html#pyarrow.parquet.ParquetFile" title="pyarrow.parquet.ParquetFile"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetFile</span></code></a> as shown above:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [27]: </span><span class="n">parquet_file</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetFile</span><span class="p">(</span><span class="s1">&#39;example.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [28]: </span><span class="n">metadata</span> <span class="o">=</span> <span class="n">parquet_file</span><span class="o">.</span><span class="n">metadata</span>
</pre></div>
</div>
<p>or can also be read directly using <a class="reference internal" href="generated/pyarrow.parquet.read_metadata.html#pyarrow.parquet.read_metadata" title="pyarrow.parquet.read_metadata"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_metadata()</span></code></a>:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [29]: </span><span class="n">metadata</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_metadata</span><span class="p">(</span><span class="s1">&#39;example.parquet&#39;</span><span class="p">)</span>
<span class="gp">In [30]: </span><span class="n">metadata</span>
<span class="gh">Out[30]: </span>
<span class="go">&lt;pyarrow._parquet.FileMetaData object at 0x7fa1f1db8090&gt;</span>
<span class="go"> created_by: parquet-cpp-arrow version 17.0.0-SNAPSHOT</span>
<span class="go"> num_columns: 4</span>
<span class="go"> num_rows: 3</span>
<span class="go"> num_row_groups: 1</span>
<span class="go"> format_version: 2.6</span>
<span class="go"> serialized_size: 2603</span>
</pre></div>
</div>
<p>The returned <code class="docutils literal notranslate"><span class="pre">FileMetaData</span></code> object allows to inspect the
<a class="reference external" href="https://github.com/apache/parquet-format#metadata">Parquet file metadata</a>,
such as the row groups and column chunk metadata and statistics:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="gp">In [31]: </span><span class="n">metadata</span><span class="o">.</span><span class="n">row_group</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="gh">Out[31]: </span>
<span class="go">&lt;pyarrow._parquet.RowGroupMetaData object at 0x7fa1f18e0090&gt;</span>
<span class="go"> num_columns: 4</span>
<span class="go"> num_rows: 3</span>
<span class="go"> total_byte_size: 282</span>
<span class="go"> sorting_columns: ()</span>
<span class="gp">In [32]: </span><span class="n">metadata</span><span class="o">.</span><span class="n">row_group</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="gh">Out[32]: </span>
<span class="go">&lt;pyarrow._parquet.ColumnChunkMetaData object at 0x7fa204e41210&gt;</span>
<span class="go"> file_offset: 108</span>
<span class="go"> file_path: </span>
<span class="go"> physical_type: DOUBLE</span>
<span class="go"> num_values: 3</span>
<span class="go"> path_in_schema: one</span>
<span class="go"> is_stats_set: True</span>
<span class="go"> statistics:</span>
<span class="go"> &lt;pyarrow._parquet.Statistics object at 0x7fa1f1db9120&gt;</span>
<span class="go"> has_min_max: True</span>
<span class="go"> min: -1.0</span>
<span class="go"> max: 2.5</span>
<span class="go"> null_count: 1</span>
<span class="go"> distinct_count: None</span>
<span class="go"> num_values: 2</span>
<span class="go"> physical_type: DOUBLE</span>
<span class="go"> logical_type: None</span>
<span class="go"> converted_type (legacy): NONE</span>
<span class="go"> compression: SNAPPY</span>
<span class="go"> encodings: (&#39;PLAIN&#39;, &#39;RLE&#39;, &#39;RLE_DICTIONARY&#39;)</span>
<span class="go"> has_dictionary_page: True</span>
<span class="go"> dictionary_page_offset: 4</span>
<span class="go"> data_page_offset: 36</span>
<span class="go"> total_compressed_size: 104</span>
<span class="go"> total_uncompressed_size: 100</span>
</pre></div>
</div>
</section>
<section id="data-type-handling">
<h2>Data Type Handling<a class="headerlink" href="#data-type-handling" title="Permalink to this heading">#</a></h2>
<section id="reading-types-as-dictionaryarray">
<h3>Reading types as DictionaryArray<a class="headerlink" href="#reading-types-as-dictionaryarray" title="Permalink to this heading">#</a></h3>
<p>The <code class="docutils literal notranslate"><span class="pre">read_dictionary</span></code> option in <code class="docutils literal notranslate"><span class="pre">read_table</span></code> and <code class="docutils literal notranslate"><span class="pre">ParquetDataset</span></code> will
cause columns to be read as <code class="docutils literal notranslate"><span class="pre">DictionaryArray</span></code>, which will become
<code class="docutils literal notranslate"><span class="pre">pandas.Categorical</span></code> when converted to pandas. This option is only valid for
string and binary column types, and it can yield significantly lower memory use
and improved performance for columns with many repeated string values.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">read_dictionary</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;binary_c0&#39;</span><span class="p">,</span> <span class="s1">&#39;stringb_c2&#39;</span><span class="p">])</span>
</pre></div>
</div>
</section>
<section id="storing-timestamps">
<h3>Storing timestamps<a class="headerlink" href="#storing-timestamps" title="Permalink to this heading">#</a></h3>
<p>Some Parquet readers may only support timestamps stored in millisecond
(<code class="docutils literal notranslate"><span class="pre">'ms'</span></code>) or microsecond (<code class="docutils literal notranslate"><span class="pre">'us'</span></code>) resolution. Since pandas uses nanoseconds
to represent timestamps, this can occasionally be a nuisance. By default
(when writing version 1.0 Parquet files), the nanoseconds will be cast to
microseconds (‘us’).</p>
<p>In addition, We provide the <code class="docutils literal notranslate"><span class="pre">coerce_timestamps</span></code> option to allow you to select
the desired resolution:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">coerce_timestamps</span><span class="o">=</span><span class="s1">&#39;ms&#39;</span><span class="p">)</span>
</pre></div>
</div>
<p>If a cast to a lower resolution value may result in a loss of data, by default
an exception will be raised. This can be suppressed by passing
<code class="docutils literal notranslate"><span class="pre">allow_truncated_timestamps=True</span></code>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">coerce_timestamps</span><span class="o">=</span><span class="s1">&#39;ms&#39;</span><span class="p">,</span>
<span class="n">allow_truncated_timestamps</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</pre></div>
</div>
<p>Timestamps with nanoseconds can be stored without casting when using the
more recent Parquet format version 2.6:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="s1">&#39;2.6&#39;</span><span class="p">)</span>
</pre></div>
</div>
<p>However, many Parquet readers do not yet support this newer format version, and
therefore the default is to write version 1.0 files. When compatibility across
different processing frameworks is required, it is recommended to use the
default version 1.0.</p>
<p>Older Parquet implementations use <code class="docutils literal notranslate"><span class="pre">INT96</span></code> based storage of
timestamps, but this is now deprecated. This includes some older
versions of Apache Impala and Apache Spark. To write timestamps in
this format, set the <code class="docutils literal notranslate"><span class="pre">use_deprecated_int96_timestamps</span></code> option to
<code class="docutils literal notranslate"><span class="pre">True</span></code> in <code class="docutils literal notranslate"><span class="pre">write_table</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">use_deprecated_int96_timestamps</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</pre></div>
</div>
</section>
</section>
<section id="compression-encoding-and-file-compatibility">
<h2>Compression, Encoding, and File Compatibility<a class="headerlink" href="#compression-encoding-and-file-compatibility" title="Permalink to this heading">#</a></h2>
<p>The most commonly used Parquet implementations use dictionary encoding when
writing files; if the dictionaries grow too large, then they “fall back” to
plain encoding. Whether dictionary encoding is used can be toggled using the
<code class="docutils literal notranslate"><span class="pre">use_dictionary</span></code> option:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">use_dictionary</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</pre></div>
</div>
<p>The data pages within a column in a row group can be compressed after the
encoding passes (dictionary, RLE encoding). In PyArrow we use Snappy
compression by default, but Brotli, Gzip, ZSTD, LZ4, and uncompressed are
also supported:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;snappy&#39;</span><span class="p">)</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;gzip&#39;</span><span class="p">)</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;brotli&#39;</span><span class="p">)</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;zstd&#39;</span><span class="p">)</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;lz4&#39;</span><span class="p">)</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span>
</pre></div>
</div>
<p>Snappy generally results in better performance, while Gzip may yield smaller
files.</p>
<p>These settings can also be set on a per-column basis:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">compression</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;foo&#39;</span><span class="p">:</span> <span class="s1">&#39;snappy&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">:</span> <span class="s1">&#39;gzip&#39;</span><span class="p">},</span>
<span class="n">use_dictionary</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">])</span>
</pre></div>
</div>
</section>
<section id="partitioned-datasets-multiple-files">
<h2>Partitioned Datasets (Multiple Files)<a class="headerlink" href="#partitioned-datasets-multiple-files" title="Permalink to this heading">#</a></h2>
<p>Multiple Parquet files constitute a Parquet <em>dataset</em>. These may present in a
number of ways:</p>
<ul class="simple">
<li><p>A list of Parquet absolute file paths</p></li>
<li><p>A directory name containing nested directories defining a partitioned dataset</p></li>
</ul>
<p>A dataset partitioned by year and month may look like on disk:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>dataset_name/
year=2007/
month=01/
0.parq
1.parq
...
month=02/
0.parq
1.parq
...
month=03/
...
year=2008/
month=01/
...
...
</pre></div>
</div>
</section>
<section id="writing-to-partitioned-datasets">
<h2>Writing to Partitioned Datasets<a class="headerlink" href="#writing-to-partitioned-datasets" title="Permalink to this heading">#</a></h2>
<p>You can write a partitioned dataset for any <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code> file system that is a
file-store (e.g. local, HDFS, S3). The default behaviour when no filesystem is
added is to use the local filesystem.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Local dataset write</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_to_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">root_path</span><span class="o">=</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span>
<span class="n">partition_cols</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;two&#39;</span><span class="p">])</span>
</pre></div>
</div>
<p>The root path in this case specifies the parent directory to which data will be
saved. The partition columns are the column names by which to partition the
dataset. Columns are partitioned in the order they are given. The partition
splits are determined by the unique values in the partition columns.</p>
<p>To use another filesystem you only need to add the filesystem parameter, the
individual table writes are wrapped using <code class="docutils literal notranslate"><span class="pre">with</span></code> statements so the
<code class="docutils literal notranslate"><span class="pre">pq.write_to_dataset</span></code> function does not need to be.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Remote file-system example</span>
<span class="kn">from</span> <span class="nn">pyarrow.fs</span> <span class="kn">import</span> <span class="n">HadoopFileSystem</span>
<span class="n">fs</span> <span class="o">=</span> <span class="n">HadoopFileSystem</span><span class="p">(</span><span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">,</span> <span class="n">user</span><span class="o">=</span><span class="n">user</span><span class="p">,</span> <span class="n">kerb_ticket</span><span class="o">=</span><span class="n">ticket_cache_path</span><span class="p">)</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_to_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">root_path</span><span class="o">=</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">,</span>
<span class="n">partition_cols</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;two&#39;</span><span class="p">],</span> <span class="n">filesystem</span><span class="o">=</span><span class="n">fs</span><span class="p">)</span>
</pre></div>
</div>
<p>Compatibility Note: if using <code class="docutils literal notranslate"><span class="pre">pq.write_to_dataset</span></code> to create a table that
will then be used by HIVE then partition column values must be compatible with
the allowed character set of the HIVE version you are running.</p>
<section id="writing-metadata-and-common-metadata-files">
<h3>Writing <code class="docutils literal notranslate"><span class="pre">_metadata</span></code> and <code class="docutils literal notranslate"><span class="pre">_common_metadata</span></code> files<a class="headerlink" href="#writing-metadata-and-common-metadata-files" title="Permalink to this heading">#</a></h3>
<p>Some processing frameworks such as Spark or Dask (optionally) use <code class="docutils literal notranslate"><span class="pre">_metadata</span></code>
and <code class="docutils literal notranslate"><span class="pre">_common_metadata</span></code> files with partitioned datasets.</p>
<p>Those files include information about the schema of the full dataset (for
<code class="docutils literal notranslate"><span class="pre">_common_metadata</span></code>) and potentially all row group metadata of all files in the
partitioned dataset as well (for <code class="docutils literal notranslate"><span class="pre">_metadata</span></code>). The actual files are
metadata-only Parquet files. Note this is not a Parquet standard, but a
convention set in practice by those frameworks.</p>
<p>Using those files can give a more efficient creation of a parquet Dataset,
since it can use the stored schema and file paths of all row groups,
instead of inferring the schema and crawling the directories for all Parquet
files (this is especially the case for filesystems where accessing files
is expensive).</p>
<p>The <a class="reference internal" href="generated/pyarrow.parquet.write_to_dataset.html#pyarrow.parquet.write_to_dataset" title="pyarrow.parquet.write_to_dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_to_dataset()</span></code></a> function does not automatically
write such metadata files, but you can use it to gather the metadata and
combine and write them manually:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Write a dataset and collect metadata information of all written files</span>
<span class="n">metadata_collector</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_to_dataset</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">root_path</span><span class="p">,</span> <span class="n">metadata_collector</span><span class="o">=</span><span class="n">metadata_collector</span><span class="p">)</span>
<span class="c1"># Write the ``_common_metadata`` parquet file without row groups statistics</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_metadata</span><span class="p">(</span><span class="n">table</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="n">root_path</span> <span class="o">/</span> <span class="s1">&#39;_common_metadata&#39;</span><span class="p">)</span>
<span class="c1"># Write the ``_metadata`` parquet file with row groups statistics of all files</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_metadata</span><span class="p">(</span>
<span class="n">table</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="n">root_path</span> <span class="o">/</span> <span class="s1">&#39;_metadata&#39;</span><span class="p">,</span>
<span class="n">metadata_collector</span><span class="o">=</span><span class="n">metadata_collector</span>
<span class="p">)</span>
</pre></div>
</div>
<p>When not using the <a class="reference internal" href="generated/pyarrow.parquet.write_to_dataset.html#pyarrow.parquet.write_to_dataset" title="pyarrow.parquet.write_to_dataset"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_to_dataset()</span></code></a> function, but
writing the individual files of the partitioned dataset using
<a class="reference internal" href="generated/pyarrow.parquet.write_table.html#pyarrow.parquet.write_table" title="pyarrow.parquet.write_table"><code class="xref py py-func docutils literal notranslate"><span class="pre">write_table()</span></code></a> or <a class="reference internal" href="generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter" title="pyarrow.parquet.ParquetWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetWriter</span></code></a>,
the <code class="docutils literal notranslate"><span class="pre">metadata_collector</span></code> keyword can also be used to collect the FileMetaData
of the written files. In this case, you need to ensure to set the file path
contained in the row group metadata yourself before combining the metadata, and
the schemas of all different files and collected FileMetaData objects should be
the same:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">metadata_collector</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
<span class="n">table1</span><span class="p">,</span> <span class="n">root_path</span> <span class="o">/</span> <span class="s2">&quot;year=2017/data1.parquet&quot;</span><span class="p">,</span>
<span class="n">metadata_collector</span><span class="o">=</span><span class="n">metadata_collector</span>
<span class="p">)</span>
<span class="c1"># set the file path relative to the root of the partitioned dataset</span>
<span class="n">metadata_collector</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set_file_path</span><span class="p">(</span><span class="s2">&quot;year=2017/data1.parquet&quot;</span><span class="p">)</span>
<span class="c1"># combine and write the metadata</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">metadata_collector</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">for</span> <span class="n">_meta</span> <span class="ow">in</span> <span class="n">metadata_collector</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
<span class="n">metadata</span><span class="o">.</span><span class="n">append_row_groups</span><span class="p">(</span><span class="n">_meta</span><span class="p">)</span>
<span class="n">metadata</span><span class="o">.</span><span class="n">write_metadata_file</span><span class="p">(</span><span class="n">root_path</span> <span class="o">/</span> <span class="s2">&quot;_metadata&quot;</span><span class="p">)</span>
<span class="c1"># or use pq.write_metadata to combine and write in a single step</span>
<span class="n">pq</span><span class="o">.</span><span class="n">write_metadata</span><span class="p">(</span>
<span class="n">table1</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="n">root_path</span> <span class="o">/</span> <span class="s2">&quot;_metadata&quot;</span><span class="p">,</span>
<span class="n">metadata_collector</span><span class="o">=</span><span class="n">metadata_collector</span>
<span class="p">)</span>
</pre></div>
</div>
</section>
</section>
<section id="reading-from-partitioned-datasets">
<h2>Reading from Partitioned Datasets<a class="headerlink" href="#reading-from-partitioned-datasets" title="Permalink to this heading">#</a></h2>
<p>The <a class="reference internal" href="generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset" title="pyarrow.parquet.ParquetDataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetDataset</span></code></a> class accepts either a directory name or a list
of file paths, and can discover and infer some common partition structures,
such as those produced by Hive:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dataset</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetDataset</span><span class="p">(</span><span class="s1">&#39;dataset_name/&#39;</span><span class="p">)</span>
<span class="n">table</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
</pre></div>
</div>
<p>You can also use the convenience function <code class="docutils literal notranslate"><span class="pre">read_table</span></code> exposed by
<code class="docutils literal notranslate"><span class="pre">pyarrow.parquet</span></code> that avoids the need for an additional Dataset object
creation step.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">table</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">)</span>
</pre></div>
</div>
<p>Note: the partition columns in the original table will have their types
converted to Arrow dictionary types (pandas categorical) on load. Ordering of
partition columns is not preserved through the save/load process. If reading
from a remote filesystem into a pandas dataframe you may need to run
<code class="docutils literal notranslate"><span class="pre">sort_index</span></code> to maintain row ordering (as long as the <code class="docutils literal notranslate"><span class="pre">preserve_index</span></code>
option was enabled on write).</p>
<p>Other features:</p>
<ul class="simple">
<li><p>Filtering on all columns (using row group statistics) instead of only on
the partition keys.</p></li>
<li><p>Fine-grained partitioning: support for a directory partitioning scheme
in addition to the Hive-like partitioning (e.g. “/2019/11/15/” instead of
“/year=2019/month=11/day=15/”), and the ability to specify a schema for
the partition keys.</p></li>
</ul>
<p>Note:</p>
<ul class="simple">
<li><p>The partition keys need to be explicitly included in the <code class="docutils literal notranslate"><span class="pre">columns</span></code>
keyword when you want to include them in the result while reading a
subset of the columns</p></li>
</ul>
</section>
<section id="using-with-spark">
<h2>Using with Spark<a class="headerlink" href="#using-with-spark" title="Permalink to this heading">#</a></h2>
<p>Spark places some constraints on the types of Parquet files it will read. The
option <code class="docutils literal notranslate"><span class="pre">flavor='spark'</span></code> will set these options automatically and also
sanitize field characters unsupported by Spark SQL.</p>
</section>
<section id="multithreaded-reads">
<h2>Multithreaded Reads<a class="headerlink" href="#multithreaded-reads" title="Permalink to this heading">#</a></h2>
<p>Each of the reading functions by default use multi-threading for reading
columns in parallel. Depending on the speed of IO
and how expensive it is to decode the columns in a particular file
(particularly with GZIP compression), this can yield significantly higher data
throughput.</p>
<p>This can be disabled by specifying <code class="docutils literal notranslate"><span class="pre">use_threads=False</span></code>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The number of threads to use concurrently is automatically inferred by Arrow
and can be inspected using the <a class="reference internal" href="generated/pyarrow.cpu_count.html#pyarrow.cpu_count" title="pyarrow.cpu_count"><code class="xref py py-func docutils literal notranslate"><span class="pre">cpu_count()</span></code></a> function.</p>
</div>
</section>
<section id="reading-from-cloud-storage">
<h2>Reading from cloud storage<a class="headerlink" href="#reading-from-cloud-storage" title="Permalink to this heading">#</a></h2>
<p>In addition to local files, pyarrow supports other filesystems, such as cloud
filesystems, through the <code class="docutils literal notranslate"><span class="pre">filesystem</span></code> keyword:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pyarrow</span> <span class="kn">import</span> <span class="n">fs</span>
<span class="n">s3</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">S3FileSystem</span><span class="p">(</span><span class="n">region</span><span class="o">=</span><span class="s2">&quot;us-east-2&quot;</span><span class="p">)</span>
<span class="n">table</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s2">&quot;bucket/object/key/prefix&quot;</span><span class="p">,</span> <span class="n">filesystem</span><span class="o">=</span><span class="n">s3</span><span class="p">)</span>
</pre></div>
</div>
<p>Currently, <a class="reference internal" href="generated/pyarrow.fs.HadoopFileSystem.html#pyarrow.fs.HadoopFileSystem" title="pyarrow.fs.HadoopFileSystem"><code class="xref py py-class docutils literal notranslate"><span class="pre">HDFS</span></code></a> and
<a class="reference internal" href="generated/pyarrow.fs.S3FileSystem.html#pyarrow.fs.S3FileSystem" title="pyarrow.fs.S3FileSystem"><code class="xref py py-class docutils literal notranslate"><span class="pre">Amazon</span> <span class="pre">S3-compatible</span> <span class="pre">storage</span></code></a> are
supported. See the <a class="reference internal" href="filesystems.html#filesystem"><span class="std std-ref">Filesystem Interface</span></a> docs for more details. For those
built-in filesystems, the filesystem can also be inferred from the file path,
if specified as a URI:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">table</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s2">&quot;s3://bucket/object/key/prefix&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>Other filesystems can still be supported if there is an
<a class="reference external" href="https://filesystem-spec.readthedocs.io/en/latest/">fsspec</a>-compatible
implementation available. See <a class="reference internal" href="filesystems.html#filesystem-fsspec"><span class="std std-ref">Using fsspec-compatible filesystems with Arrow</span></a> for more details.
One example is Azure Blob storage, which can be interfaced through the
<a class="reference external" href="https://github.com/dask/adlfs">adlfs</a> package.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">adlfs</span> <span class="kn">import</span> <span class="n">AzureBlobFileSystem</span>
<span class="n">abfs</span> <span class="o">=</span> <span class="n">AzureBlobFileSystem</span><span class="p">(</span><span class="n">account_name</span><span class="o">=</span><span class="s2">&quot;XXXX&quot;</span><span class="p">,</span> <span class="n">account_key</span><span class="o">=</span><span class="s2">&quot;XXXX&quot;</span><span class="p">,</span> <span class="n">container_name</span><span class="o">=</span><span class="s2">&quot;XXXX&quot;</span><span class="p">)</span>
<span class="n">table</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span><span class="s2">&quot;file.parquet&quot;</span><span class="p">,</span> <span class="n">filesystem</span><span class="o">=</span><span class="n">abfs</span><span class="p">)</span>
</pre></div>
</div>
</section>
<section id="parquet-modular-encryption-columnar-encryption">
<h2>Parquet Modular Encryption (Columnar Encryption)<a class="headerlink" href="#parquet-modular-encryption-columnar-encryption" title="Permalink to this heading">#</a></h2>
<p>Columnar encryption is supported for Parquet files in C++ starting from
Apache Arrow 4.0.0 and in PyArrow starting from Apache Arrow 6.0.0.</p>
<p>Parquet uses the envelope encryption practice, where file parts are encrypted
with “data encryption keys” (DEKs), and the DEKs are encrypted with “master
encryption keys” (MEKs). The DEKs are randomly generated by Parquet for each
encrypted file/column. The MEKs are generated, stored and managed in a Key
Management Service (KMS) of user’s choice.</p>
<p>Reading and writing encrypted Parquet files involves passing file encryption
and decryption properties to <a class="reference internal" href="generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter" title="pyarrow.parquet.ParquetWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetWriter</span></code></a> and to
<a class="reference internal" href="generated/pyarrow.parquet.ParquetFile.html#pyarrow.parquet.ParquetFile" title="pyarrow.parquet.ParquetFile"><code class="xref py py-class docutils literal notranslate"><span class="pre">ParquetFile</span></code></a>, respectively.</p>
<p>Writing an encrypted Parquet file:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">encryption_properties</span> <span class="o">=</span> <span class="n">crypto_factory</span><span class="o">.</span><span class="n">file_encryption_properties</span><span class="p">(</span>
<span class="n">kms_connection_config</span><span class="p">,</span> <span class="n">encryption_config</span><span class="p">)</span>
<span class="k">with</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetWriter</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span>
<span class="n">encryption_properties</span><span class="o">=</span><span class="n">encryption_properties</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="n">writer</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span><span class="n">table</span><span class="p">)</span>
</pre></div>
</div>
<p>Reading an encrypted Parquet file:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">decryption_properties</span> <span class="o">=</span> <span class="n">crypto_factory</span><span class="o">.</span><span class="n">file_decryption_properties</span><span class="p">(</span>
<span class="n">kms_connection_config</span><span class="p">)</span>
<span class="n">parquet_file</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">ParquetFile</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span>
<span class="n">decryption_properties</span><span class="o">=</span><span class="n">decryption_properties</span><span class="p">)</span>
</pre></div>
</div>
<p>In order to create the encryption and decryption properties, a
<a class="reference internal" href="generated/pyarrow.parquet.encryption.CryptoFactory.html#pyarrow.parquet.encryption.CryptoFactory" title="pyarrow.parquet.encryption.CryptoFactory"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.encryption.CryptoFactory</span></code></a> should be created and
initialized with KMS Client details, as described below.</p>
<section id="kms-client">
<h3>KMS Client<a class="headerlink" href="#kms-client" title="Permalink to this heading">#</a></h3>
<p>The master encryption keys should be kept and managed in a production-grade
Key Management System (KMS), deployed in the user’s organization. Using Parquet
encryption requires implementation of a client class for the KMS server.
Any KmsClient implementation should implement the informal interface
defined by <a class="reference internal" href="generated/pyarrow.parquet.encryption.KmsClient.html#pyarrow.parquet.encryption.KmsClient" title="pyarrow.parquet.encryption.KmsClient"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.encryption.KmsClient</span></code></a> as following:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pyarrow.parquet.encryption</span> <span class="k">as</span> <span class="nn">pe</span>
<span class="k">class</span> <span class="nc">MyKmsClient</span><span class="p">(</span><span class="n">pe</span><span class="o">.</span><span class="n">KmsClient</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;An example KmsClient implementation skeleton&quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">kms_connection_configuration</span><span class="p">):</span>
<span class="n">pe</span><span class="o">.</span><span class="n">KmsClient</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="c1"># Any KMS-specific initialization based on</span>
<span class="c1"># kms_connection_configuration comes here</span>
<span class="k">def</span> <span class="nf">wrap_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key_bytes</span><span class="p">,</span> <span class="n">master_key_identifier</span><span class="p">):</span>
<span class="n">wrapped_key</span> <span class="o">=</span> <span class="o">...</span> <span class="c1"># call KMS to wrap key_bytes with key specified by</span>
<span class="c1"># master_key_identifier</span>
<span class="k">return</span> <span class="n">wrapped_key</span>
<span class="k">def</span> <span class="nf">unwrap_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wrapped_key</span><span class="p">,</span> <span class="n">master_key_identifier</span><span class="p">):</span>
<span class="n">key_bytes</span> <span class="o">=</span> <span class="o">...</span> <span class="c1"># call KMS to unwrap wrapped_key with key specified by</span>
<span class="c1"># master_key_identifier</span>
<span class="k">return</span> <span class="n">key_bytes</span>
</pre></div>
</div>
<p>The concrete implementation will be loaded at runtime by a factory function
provided by the user. This factory function will be used to initialize the
<a class="reference internal" href="generated/pyarrow.parquet.encryption.CryptoFactory.html#pyarrow.parquet.encryption.CryptoFactory" title="pyarrow.parquet.encryption.CryptoFactory"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.encryption.CryptoFactory</span></code></a> for creating file encryption
and decryption properties.</p>
<p>For example, in order to use the <code class="docutils literal notranslate"><span class="pre">MyKmsClient</span></code> defined above:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">kms_client_factory</span><span class="p">(</span><span class="n">kms_connection_configuration</span><span class="p">):</span>
<span class="k">return</span> <span class="n">MyKmsClient</span><span class="p">(</span><span class="n">kms_connection_configuration</span><span class="p">)</span>
<span class="n">crypto_factory</span> <span class="o">=</span> <span class="n">CryptoFactory</span><span class="p">(</span><span class="n">kms_client_factory</span><span class="p">)</span>
</pre></div>
</div>
<p>An <a class="reference download internal" download="" href="../_downloads/2713f3cdaa3fc0dc691cd51bac09c6d4/sample_vault_kms_client.py"><code class="xref download docutils literal notranslate"><span class="pre">example</span></code></a>
of such a class for an open source
<a class="reference external" href="https://www.vaultproject.io/api/secret/transit">KMS</a> can be found in the Apache
Arrow GitHub repository. The production KMS client should be designed in
cooperation with an organization’s security administrators, and built by
developers with experience in access control management. Once such a class is
created, it can be passed to applications via a factory method and leveraged
by general PyArrow users as shown in the encrypted parquet write/read sample
above.</p>
</section>
<section id="kms-connection-configuration">
<h3>KMS connection configuration<a class="headerlink" href="#kms-connection-configuration" title="Permalink to this heading">#</a></h3>
<p>Configuration of connection to KMS (<a class="reference internal" href="generated/pyarrow.parquet.encryption.KmsConnectionConfig.html#pyarrow.parquet.encryption.KmsConnectionConfig" title="pyarrow.parquet.encryption.KmsConnectionConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.encryption.KmsConnectionConfig</span></code></a>
used when creating file encryption and decryption properties) includes the
following options:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">kms_instance_url</span></code>, URL of the KMS instance.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">kms_instance_id</span></code>, ID of the KMS instance that will be used for encryption
(if multiple KMS instances are available).</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">key_access_token</span></code>, authorization token that will be passed to KMS.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">custom_kms_conf</span></code>, a string dictionary with KMS-type-specific configuration.</p></li>
</ul>
</section>
<section id="encryption-configuration">
<h3>Encryption configuration<a class="headerlink" href="#encryption-configuration" title="Permalink to this heading">#</a></h3>
<p><a class="reference internal" href="generated/pyarrow.parquet.encryption.EncryptionConfiguration.html#pyarrow.parquet.encryption.EncryptionConfiguration" title="pyarrow.parquet.encryption.EncryptionConfiguration"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.encryption.EncryptionConfiguration</span></code></a> (used when
creating file encryption properties) includes the following options:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">footer_key</span></code>, the ID of the master key for footer encryption/signing.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">column_keys</span></code>, which columns to encrypt with which key. Dictionary with
master key IDs as the keys, and column name lists as the values,
e.g. <code class="docutils literal notranslate"><span class="pre">{key1:</span> <span class="pre">[col1,</span> <span class="pre">col2],</span> <span class="pre">key2:</span> <span class="pre">[col3]}</span></code> .</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">encryption_algorithm</span></code>, the Parquet encryption algorithm.
Can be <code class="docutils literal notranslate"><span class="pre">AES_GCM_V1</span></code> (default) or <code class="docutils literal notranslate"><span class="pre">AES_GCM_CTR_V1</span></code>.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">plaintext_footer</span></code>, whether to write the file footer in plain text (otherwise it is encrypted).</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">double_wrapping</span></code>, whether to use double wrapping - where data encryption keys (DEKs)
are encrypted with key encryption keys (KEKs), which in turn are encrypted
with master encryption keys (MEKs). If set to <code class="docutils literal notranslate"><span class="pre">false</span></code>, single wrapping is
used - where DEKs are encrypted directly with MEKs.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">cache_lifetime</span></code>, the lifetime of cached entities (key encryption keys,
local wrapping keys, KMS client objects) represented as a <code class="docutils literal notranslate"><span class="pre">datetime.timedelta</span></code>.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">internal_key_material</span></code>, whether to store key material inside Parquet file footers;
this mode doesn’t produce additional files. If set to <code class="docutils literal notranslate"><span class="pre">false</span></code>, key material is
stored in separate files in the same folder, which enables key rotation for
immutable Parquet files.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">data_key_length_bits</span></code>, the length of data encryption keys (DEKs), randomly
generated by Parquet key management tools. Can be 128, 192 or 256 bits.</p></li>
</ul>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>When <code class="docutils literal notranslate"><span class="pre">double_wrapping</span></code> is true, Parquet implements a “double envelope
encryption” mode that minimizes the interaction of the program with a KMS
server. In this mode, the DEKs are encrypted with “key encryption keys”
(KEKs, randomly generated by Parquet). The KEKs are encrypted with “master
encryption keys” (MEKs) in the KMS; the result and the KEK itself are
cached in the process memory.</p>
</div>
<p>An example encryption configuration:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">encryption_config</span> <span class="o">=</span> <span class="n">pq</span><span class="o">.</span><span class="n">EncryptionConfiguration</span><span class="p">(</span>
<span class="n">footer_key</span><span class="o">=</span><span class="s2">&quot;footer_key_name&quot;</span><span class="p">,</span>
<span class="n">column_keys</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;column_key_name&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Column1&quot;</span><span class="p">,</span> <span class="s2">&quot;Column2&quot;</span><span class="p">],</span>
<span class="p">},</span>
<span class="p">)</span>
</pre></div>
</div>
</section>
<section id="decryption-configuration">
<h3>Decryption configuration<a class="headerlink" href="#decryption-configuration" title="Permalink to this heading">#</a></h3>
<p><a class="reference internal" href="generated/pyarrow.parquet.encryption.DecryptionConfiguration.html#pyarrow.parquet.encryption.DecryptionConfiguration" title="pyarrow.parquet.encryption.DecryptionConfiguration"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.parquet.encryption.DecryptionConfiguration</span></code></a> (used when creating
file decryption properties) is optional and it includes the following options:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">cache_lifetime</span></code>, the lifetime of cached entities (key encryption keys, local
wrapping keys, KMS client objects) represented as a <code class="docutils literal notranslate"><span class="pre">datetime.timedelta</span></code>.</p></li>
</ul>
</section>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="json.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Reading JSON files</p>
</div>
</a>
<a class="right-next"
href="dataset.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Tabular Datasets</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#obtaining-pyarrow-with-parquet-support">Obtaining pyarrow with Parquet Support</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-and-writing-single-files">Reading and Writing Single Files</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-parquet-and-memory-mapping">Reading Parquet and Memory Mapping</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#parquet-file-writing-options">Parquet file writing options</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#omitting-the-dataframe-index">Omitting the DataFrame index</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#finer-grained-reading-and-writing">Finer-grained Reading and Writing</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#inspecting-the-parquet-file-metadata">Inspecting the Parquet File Metadata</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-type-handling">Data Type Handling</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-types-as-dictionaryarray">Reading types as DictionaryArray</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#storing-timestamps">Storing timestamps</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#compression-encoding-and-file-compatibility">Compression, Encoding, and File Compatibility</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#partitioned-datasets-multiple-files">Partitioned Datasets (Multiple Files)</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-to-partitioned-datasets">Writing to Partitioned Datasets</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-metadata-and-common-metadata-files">Writing <code class="docutils literal notranslate"><span class="pre">_metadata</span></code> and <code class="docutils literal notranslate"><span class="pre">_common_metadata</span></code> files</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-from-partitioned-datasets">Reading from Partitioned Datasets</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#using-with-spark">Using with Spark</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multithreaded-reads">Multithreaded Reads</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-from-cloud-storage">Reading from cloud storage</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#parquet-modular-encryption-columnar-encryption">Parquet Modular Encryption (Columnar Encryption)</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kms-client">KMS Client</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kms-connection-configuration">KMS connection configuration</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#encryption-configuration">Encryption configuration</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decryption-configuration">Decryption configuration</a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/python/parquet.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>