blob: 8a7bbd7d9c4920dfd72c17bd68f34b3c50033836 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Python Development &#8212; Apache Arrow v17.0.0.dev59</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'developers/python';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/developers/python.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Continuous Integration" href="continuous_integration/index.html" />
<link rel="prev" title="Development Guidelines" href="java/development.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev59 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev59 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="bug_reports.html">Bug reports and feature requests</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="guide/index.html">New Contributor’s Guide</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="guide/architectural_overview.html">Architectural Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="guide/communication.html">Communication</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="guide/step_by_step/index.html">Steps in making your first PR</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/set_up.html">Set up</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/building.html">Building the Arrow libraries 🏋🏿‍♀️</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/finding_issues.html">Finding good first issues 🔎</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/arrow_codebase.html">Working on the Arrow codebase 🧐</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/testing.html">Testing 🧪</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/styling.html">Styling 😎</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/step_by_step/pr_lifecycle.html">Lifecycle of a pull request</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="guide/documentation.html">Helping with documentation</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="guide/tutorials/index.html">Tutorials</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="guide/tutorials/python_tutorial.html">Python tutorial</a></li>
<li class="toctree-l3"><a class="reference internal" href="guide/tutorials/r_tutorial.html">R tutorials</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="guide/resources.html">Additional information and resources</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="overview.html">Contributing Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="reviewing.html">Reviewing contributions</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="cpp/index.html">C++ Development</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="cpp/building.html">Building Arrow C++</a></li>
<li class="toctree-l2"><a class="reference internal" href="cpp/development.html">Development Guidelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="cpp/windows.html">Developing on Windows</a></li>
<li class="toctree-l2"><a class="reference internal" href="cpp/emscripten.html">Cross compiling for WebAssembly with Emscripten</a></li>
<li class="toctree-l2"><a class="reference internal" href="cpp/conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="cpp/fuzzing.html">Fuzzing Arrow C++</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="java/index.html">Java Development</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="java/building.html">Building Arrow Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="java/development.html">Development Guidelines</a></li>
</ul>
</li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Python Development</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="continuous_integration/index.html">Continuous Integration</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="continuous_integration/overview.html">Continuous Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="continuous_integration/docker.html">Running Docker Builds</a></li>
<li class="toctree-l2"><a class="reference internal" href="continuous_integration/archery.html">Daily Development using Archery</a></li>
<li class="toctree-l2"><a class="reference internal" href="continuous_integration/crossbow.html">Packaging and Testing with Crossbow</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference internal" href="documentation.html">Building the Documentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="release.html">Release Management Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="release_verification.html">Release Verification Process</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Development</a></li>
<li class="breadcrumb-item active" aria-current="page">Python Development</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="python-development">
<span id="id1"></span><h1>Python Development<a class="headerlink" href="#python-development" title="Permalink to this heading">#</a></h1>
<p>This page provides general Python development guidelines and source build
instructions for all platforms.</p>
<section id="coding-style">
<h2>Coding Style<a class="headerlink" href="#coding-style" title="Permalink to this heading">#</a></h2>
<p>We follow a similar PEP8-like coding style to the <a class="reference external" href="https://github.com/pandas-dev/pandas">pandas project</a>. To check style issues, use the
<a class="reference internal" href="continuous_integration/archery.html#archery"><span class="std std-ref">Archery</span></a> subcommand <code class="docutils literal notranslate"><span class="pre">lint</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span><span class="s2">&quot;arrow/dev/archery[lint]&quot;</span>
</pre></div>
</div>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>lint<span class="w"> </span>--python
</pre></div>
</div>
<p>Some of the issues can be automatically fixed by passing the <code class="docutils literal notranslate"><span class="pre">--fix</span></code> option:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>lint<span class="w"> </span>--python<span class="w"> </span>--fix
</pre></div>
</div>
<p>The Python code base also includes some C++ files. To fix formatting in those
files, add the <code class="docutils literal notranslate"><span class="pre">--clang-format</span></code> option:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>lint<span class="w"> </span>--python<span class="w"> </span>--clang-format<span class="w"> </span>--fix
</pre></div>
</div>
</section>
<section id="unit-testing">
<span id="python-unit-testing"></span><h2>Unit Testing<a class="headerlink" href="#unit-testing" title="Permalink to this heading">#</a></h2>
<p>We are using <a class="reference external" href="https://docs.pytest.org/en/latest/">pytest</a> to develop our unit
test suite. After building the project (see below) you can run its unit tests
like so:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow/python
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>pyarrow
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>Package requirements to run the unit tests are found in
<code class="docutils literal notranslate"><span class="pre">requirements-test.txt</span></code> and can be installed if needed with <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">-r</span>
<span class="pre">requirements-test.txt</span></code>.</p>
<p>If you get import errors for <code class="docutils literal notranslate"><span class="pre">pyarrow._lib</span></code> or another PyArrow module when
trying to run the tests, run <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">-m</span> <span class="pre">pytest</span> <span class="pre">arrow/python/pyarrow</span></code> and check
if the editable version of pyarrow was installed correctly.</p>
<p>The project has a number of custom command line options for its test
suite. Some tests are disabled by default, for example. To see all the options,
run</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>pyarrow<span class="w"> </span>--help
</pre></div>
</div>
<p>and look for the “custom options” section.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>There are a few low-level tests written directly in C++. These tests are
implemented in <a class="reference external" href="https://github.com/apache/arrow/blob/main/python/pyarrow/src/python_test.cc">pyarrow/src/python_test.cc</a>,
but they are also wrapped in a <code class="docutils literal notranslate"><span class="pre">pytest</span></code>-based
<a class="reference external" href="https://github.com/apache/arrow/blob/main/python/pyarrow/tests/test_cpp_internals.py">test module</a>
run automatically as part of the PyArrow test suite.</p>
</div>
<section id="test-groups">
<h3>Test Groups<a class="headerlink" href="#test-groups" title="Permalink to this heading">#</a></h3>
<p>We have many tests that are grouped together using pytest marks. Some of these
are disabled by default. To enable a test group, pass <code class="docutils literal notranslate"><span class="pre">--$GROUP_NAME</span></code>,
e.g. <code class="docutils literal notranslate"><span class="pre">--parquet</span></code>. To disable a test group, prepend <code class="docutils literal notranslate"><span class="pre">disable</span></code>, so
<code class="docutils literal notranslate"><span class="pre">--disable-parquet</span></code> for example. To run <strong>only</strong> the unit tests for a
particular group, prepend <code class="docutils literal notranslate"><span class="pre">only-</span></code> instead, for example <code class="docutils literal notranslate"><span class="pre">--only-parquet</span></code>.</p>
<p>The test groups currently include:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">dataset</span></code>: Apache Arrow Dataset tests</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">flight</span></code>: Flight RPC tests</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">gandiva</span></code>: tests for Gandiva expression compiler (uses LLVM)</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">hdfs</span></code>: tests that use libhdfs to access the Hadoop filesystem</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">hypothesis</span></code>: tests that use the <code class="docutils literal notranslate"><span class="pre">hypothesis</span></code> module for generating
random test cases. Note that <code class="docutils literal notranslate"><span class="pre">--hypothesis</span></code> doesn’t work due to a quirk
with pytest, so you have to pass <code class="docutils literal notranslate"><span class="pre">--enable-hypothesis</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">large_memory</span></code>: Test requiring a large amount of system RAM</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">orc</span></code>: Apache ORC tests</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">parquet</span></code>: Apache Parquet tests</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">s3</span></code>: Tests for Amazon S3</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">tensorflow</span></code>: Tests that involve TensorFlow</p></li>
</ul>
</section>
<section id="doctest">
<h3>Doctest<a class="headerlink" href="#doctest" title="Permalink to this heading">#</a></h3>
<p>We are using <a class="reference external" href="https://docs.python.org/3/library/doctest.html">doctest</a>
to check that docstring examples are up-to-date and correct. You can
also do that locally by running:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow/python
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>--doctest-modules
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>--doctest-modules<span class="w"> </span>path/to/module.py<span class="w"> </span><span class="c1"># checking single file</span>
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>for <code class="docutils literal notranslate"><span class="pre">.py</span></code> files or</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow/python
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>--doctest-cython
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>--doctest-cython<span class="w"> </span>path/to/module.pyx<span class="w"> </span><span class="c1"># checking single file</span>
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>for <code class="docutils literal notranslate"><span class="pre">.pyx</span></code> and <code class="docutils literal notranslate"><span class="pre">.pxi</span></code> files. In this case you will also need to
install the <a class="reference external" href="https://github.com/lgpage/pytest-cython">pytest-cython</a> plugin.</p>
</section>
<section id="benchmarking">
<h3>Benchmarking<a class="headerlink" href="#benchmarking" title="Permalink to this heading">#</a></h3>
<p>For running the benchmarks, see <a class="reference internal" href="../python/benchmarks.html#python-benchmarks"><span class="std std-ref">Benchmarks</span></a>.</p>
</section>
</section>
<section id="building-on-linux-and-macos">
<span id="build-pyarrow"></span><h2>Building on Linux and macOS<a class="headerlink" href="#building-on-linux-and-macos" title="Permalink to this heading">#</a></h2>
<section id="system-requirements">
<h3>System Requirements<a class="headerlink" href="#system-requirements" title="Permalink to this heading">#</a></h3>
<p>On macOS, any modern XCode (6.4 or higher; the current version is 13) or
Xcode Command Line Tools (<code class="docutils literal notranslate"><span class="pre">xcode-select</span> <span class="pre">--install</span></code>) are sufficient.</p>
<p>On Linux, for this guide, we require a minimum of gcc 4.8 or clang 3.7.
You can check your version by running</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>gcc<span class="w"> </span>--version
</pre></div>
</div>
<p>If the system compiler is older than gcc 4.8, it can be set to a newer version
using the <code class="docutils literal notranslate"><span class="pre">$CC</span></code> and <code class="docutils literal notranslate"><span class="pre">$CXX</span></code> environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">CC</span><span class="o">=</span>gcc-4.8
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">CXX</span><span class="o">=</span>g++-4.8
</pre></div>
</div>
</section>
<section id="environment-setup-and-build">
<h3>Environment Setup and Build<a class="headerlink" href="#environment-setup-and-build" title="Permalink to this heading">#</a></h3>
<p>First, let’s clone the Arrow git repository:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/apache/arrow.git
</pre></div>
</div>
<p>Pull in the test data and setup the environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow
<span class="gp">$ </span>git<span class="w"> </span>submodule<span class="w"> </span>update<span class="w"> </span>--init
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">PARQUET_TEST_DATA</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">${</span><span class="nv">PWD</span><span class="si">}</span><span class="s2">/cpp/submodules/parquet-testing/data&quot;</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_TEST_DATA</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">${</span><span class="nv">PWD</span><span class="si">}</span><span class="s2">/testing/data&quot;</span>
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<section id="using-conda">
<h4>Using Conda<a class="headerlink" href="#using-conda" title="Permalink to this heading">#</a></h4>
<p>The <a class="reference external" href="https://conda.io/">conda</a> package manager allows installing build-time
dependencies for Arrow C++ and PyArrow as pre-built binaries, which can make
Arrow development easier and faster.</p>
<p>Let’s create a conda environment with all the C++ build and Python dependencies
from conda-forge, targeting development for Python 3.10:</p>
<p>On Linux and macOS:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>create<span class="w"> </span>-y<span class="w"> </span>-n<span class="w"> </span>pyarrow-dev<span class="w"> </span>-c<span class="w"> </span>conda-forge<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_unix.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_cpp.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_python.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_gandiva.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>compilers<span class="w"> </span><span class="se">\</span>
<span class="w"> </span><span class="nv">python</span><span class="o">=</span><span class="m">3</span>.10<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>pandas
</pre></div>
</div>
<p>As of January 2019, the <code class="docutils literal notranslate"><span class="pre">compilers</span></code> package is needed on many Linux
distributions to use packages from conda-forge.</p>
<p>With this out of the way, you can now activate the conda environment</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>activate<span class="w"> </span>pyarrow-dev
</pre></div>
</div>
<p>For Windows, see the <a class="reference internal" href="#building-on-windows">Building on Windows</a> section below.</p>
<p>We need to set some environment variables to let Arrow’s build system know
about our build toolchain:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_HOME</span><span class="o">=</span><span class="nv">$CONDA_PREFIX</span>
</pre></div>
</div>
</section>
<section id="using-system-and-bundled-dependencies">
<h4>Using system and bundled dependencies<a class="headerlink" href="#using-system-and-bundled-dependencies" title="Permalink to this heading">#</a></h4>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>If you installed Python using the Anaconda distribution or <a class="reference external" href="https://conda.io/miniconda.html">Miniconda</a>, you cannot currently use a
pip-based virtual environment. Please follow the conda-based development
instructions instead.</p>
</div>
<p>If not using conda, you must arrange for your system to provide the required
build tools and dependencies. Note that if some dependencies are absent,
the Arrow C++ build chain may still be able to download and compile them
on the fly, but this will take a longer time than with pre-installed binaries.</p>
<p id="python-homebrew">On macOS, use Homebrew to install all dependencies required for
building Arrow C++:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>brew<span class="w"> </span>update<span class="w"> </span><span class="o">&amp;&amp;</span><span class="w"> </span>brew<span class="w"> </span>bundle<span class="w"> </span>--file<span class="o">=</span>arrow/cpp/Brewfile
</pre></div>
</div>
<p>See <a class="reference internal" href="cpp/building.html#cpp-build-dependency-management"><span class="std std-ref">here</span></a> for a list of dependencies you
may need.</p>
<p>On Debian/Ubuntu, you need the following minimal set of dependencies:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>sudo<span class="w"> </span>apt-get<span class="w"> </span>install<span class="w"> </span>build-essential<span class="w"> </span>cmake<span class="w"> </span>python3-dev
</pre></div>
</div>
<p>Now, let’s create a Python virtual environment with all Python dependencies
in the same folder as the repositories, and a target installation folder:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>python3<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>pyarrow-dev
<span class="gp">$ </span><span class="nb">source</span><span class="w"> </span>./pyarrow-dev/bin/activate
<span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>arrow/python/requirements-build.txt
<span class="gp">$ </span><span class="c1"># This is the folder where we will install the Arrow libraries during</span>
<span class="gp">$ </span><span class="c1"># development</span>
<span class="gp">$ </span>mkdir<span class="w"> </span>dist
</pre></div>
</div>
<p>If your CMake version is too old on Linux, you could get a newer one via
<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">cmake</span></code>.</p>
<p>We need to set some environment variables to let Arrow’s build system know
about our build toolchain:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_HOME</span><span class="o">=</span><span class="k">$(</span><span class="nb">pwd</span><span class="k">)</span>/dist
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">LD_LIBRARY_PATH</span><span class="o">=</span><span class="k">$(</span><span class="nb">pwd</span><span class="k">)</span>/dist/lib:<span class="nv">$LD_LIBRARY_PATH</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">CMAKE_PREFIX_PATH</span><span class="o">=</span><span class="nv">$ARROW_HOME</span>:<span class="nv">$CMAKE_PREFIX_PATH</span>
</pre></div>
</div>
</section>
</section>
<section id="build-and-test">
<h3>Build and test<a class="headerlink" href="#build-and-test" title="Permalink to this heading">#</a></h3>
<p>Now build the Arrow C++ libraries and install them into the directory we
created above (stored in <code class="docutils literal notranslate"><span class="pre">$ARROW_HOME</span></code>):</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>mkdir<span class="w"> </span>arrow/cpp/build
<span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow/cpp/build
<span class="gp">$ </span>cmake<span class="w"> </span>-DCMAKE_INSTALL_PREFIX<span class="o">=</span><span class="nv">$ARROW_HOME</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DCMAKE_INSTALL_LIBDIR<span class="o">=</span>lib<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DCMAKE_BUILD_TYPE<span class="o">=</span>Debug<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_BUILD_TESTS<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_COMPUTE<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_CSV<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_DATASET<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_FILESYSTEM<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_HDFS<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_JSON<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_PARQUET<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_BROTLI<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_BZ2<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_LZ4<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_SNAPPY<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_ZLIB<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_ZSTD<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DPARQUET_REQUIRE_ENCRYPTION<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>..
<span class="gp">$ </span>make<span class="w"> </span>-j4
<span class="gp">$ </span>make<span class="w"> </span>install
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>There are a number of optional components that can be switched ON by
adding flags with <code class="docutils literal notranslate"><span class="pre">ON</span></code>:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">ARROW_CUDA</span></code>: Support for CUDA-enabled GPUs</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">ARROW_DATASET</span></code>: Support for Apache Arrow Dataset</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">ARROW_FLIGHT</span></code>: Flight RPC framework</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">ARROW_GANDIVA</span></code>: LLVM-based expression compiler</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">ARROW_ORC</span></code>: Support for Apache ORC file format</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">ARROW_PARQUET</span></code>: Support for Apache Parquet file format</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">PARQUET_REQUIRE_ENCRYPTION</span></code>: Support for Parquet Modular Encryption</p></li>
</ul>
<p>Anything set to <code class="docutils literal notranslate"><span class="pre">ON</span></code> above can also be turned off. Note that some compression
libraries are recommended for full Parquet support.</p>
<p>You may choose between different kinds of C++ build types:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=Release</span></code> (the default) produces a build with optimizations
enabled and debugging information disabled;</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=Debug</span></code> produces a build with optimizations
disabled and debugging information enabled;</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=RelWithDebInfo</span></code> produces a build with both optimizations
and debugging information enabled.</p></li>
</ul>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="cpp/building.html#cpp-building-building"><span class="std std-ref">Building Arrow C++</span></a>.</p>
</div>
<p>If multiple versions of Python are installed in your environment, you may have
to pass additional parameters to CMake so that it can find the right
executable, headers and libraries. For example, specifying
<code class="docutils literal notranslate"><span class="pre">-DPython3_EXECUTABLE=&lt;path/to/bin/python&gt;</span></code> lets CMake choose the
Python executable which you are using.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>On Linux systems with support for building on multiple architectures,
<code class="docutils literal notranslate"><span class="pre">make</span></code> may install libraries in the <code class="docutils literal notranslate"><span class="pre">lib64</span></code> directory by default. For
this reason we recommend passing <code class="docutils literal notranslate"><span class="pre">-DCMAKE_INSTALL_LIBDIR=lib</span></code> because the
Python build scripts assume the library directory is <code class="docutils literal notranslate"><span class="pre">lib</span></code></p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If you have conda installed but are not using it to manage dependencies,
and you have trouble building the C++ library, you may need to set
<code class="docutils literal notranslate"><span class="pre">-DARROW_DEPENDENCY_SOURCE=AUTO</span></code> or some other value (described
<a class="reference internal" href="cpp/building.html#cpp-build-dependency-management"><span class="std std-ref">here</span></a>)
to explicitly tell CMake not to use conda.</p>
</div>
<p>For any other C++ build challenges, see <a class="reference internal" href="cpp/index.html#cpp-development"><span class="std std-ref">C++ Development</span></a>.</p>
<p>In case you may need to rebuild the C++ part due to errors in the process it is
advisable to delete the build folder with command <code class="docutils literal notranslate"><span class="pre">rm</span> <span class="pre">-rf</span> <span class="pre">arrow/cpp/build</span></code>.
If the build has passed successfully and you need to rebuild due to latest pull
from git main, then this step is not needed.</p>
<p>Now, build pyarrow:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow/python
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">PYARROW_WITH_PARQUET</span><span class="o">=</span><span class="m">1</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">PYARROW_WITH_DATASET</span><span class="o">=</span><span class="m">1</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">PYARROW_PARALLEL</span><span class="o">=</span><span class="m">4</span>
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>If you did build one of the optional components (in C++), you need to set the
corresponding <code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_$COMPONENT</span></code> environment variable to 1.</p>
<p>Similarly, if you built with <code class="docutils literal notranslate"><span class="pre">PARQUET_REQUIRE_ENCRYPTION</span></code> (in C++), you
need to set the corresponding <code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_PARQUET_ENCRYPTION</span></code> environment
variable to 1.</p>
<p>To set the number of threads used to compile PyArrow’s C++/Cython components,
set the <code class="docutils literal notranslate"><span class="pre">PYARROW_PARALLEL</span></code> environment variable.</p>
<p>If you wish to delete stale PyArrow build artifacts before rebuilding, navigate
to the <code class="docutils literal notranslate"><span class="pre">arrow/python</span></code> folder and run <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">clean</span> <span class="pre">-Xfd</span> <span class="pre">.</span></code>.</p>
<p>By default, PyArrow will be built in release mode even if Arrow C++ has been
built in debug mode. To create a debug build of PyArrow, run
<code class="docutils literal notranslate"><span class="pre">export</span> <span class="pre">PYARROW_BUILD_TYPE=debug</span></code> prior to running <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">setup.py</span>
<span class="pre">build_ext</span> <span class="pre">--inplace</span></code> above. A <code class="docutils literal notranslate"><span class="pre">relwithdebinfo</span></code> build can be created
similarly.</p>
<p>Now you are ready to install test dependencies and run <a class="reference internal" href="#unit-testing">Unit Testing</a>, as
described above.</p>
<p>To build a self-contained wheel (including the Arrow and Parquet C++
libraries), one can set <code class="docutils literal notranslate"><span class="pre">--bundle-arrow-cpp</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>wheel<span class="w"> </span><span class="c1"># if not installed</span>
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--build-type<span class="o">=</span><span class="nv">$ARROW_BUILD_TYPE</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--bundle-arrow-cpp<span class="w"> </span>bdist_wheel
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>To install an editable PyArrow build run <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">-e</span> <span class="pre">.</span> <span class="pre">--no-build-isolation</span></code>
in the <code class="docutils literal notranslate"><span class="pre">arrow/python</span></code> directory.</p>
</div>
<section id="docker-examples">
<h4>Docker examples<a class="headerlink" href="#docker-examples" title="Permalink to this heading">#</a></h4>
<p>If you are having difficulty building the Python library from source, take a
look at the <code class="docutils literal notranslate"><span class="pre">python/examples/minimal_build</span></code> directory which illustrates a
complete build and test from source both with the conda- and pip-based build
methods.</p>
</section>
</section>
<section id="debugging">
<h3>Debugging<a class="headerlink" href="#debugging" title="Permalink to this heading">#</a></h3>
<p>Since pyarrow depends on the Arrow C++ libraries, debugging can
frequently involve crossing between Python and C++ shared libraries.
For the best experience, make sure you’ve built both Arrow C++
(<code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=Debug</span></code>) and PyArrow (<code class="docutils literal notranslate"><span class="pre">export</span> <span class="pre">PYARROW_BUILD_TYPE=debug</span></code>)
in debug mode.</p>
<section id="using-gdb-on-linux">
<h4>Using gdb on Linux<a class="headerlink" href="#using-gdb-on-linux" title="Permalink to this heading">#</a></h4>
<p>To debug the C++ libraries with gdb while running the Python unit
tests, first start pytest with gdb:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>gdb<span class="w"> </span>--args<span class="w"> </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>pyarrow/tests/test_to_run.py<span class="w"> </span>-k<span class="w"> </span><span class="nv">$TEST_TO_MATCH</span>
</pre></div>
</div>
<p>To set a breakpoint, use the same gdb syntax that you would when
debugging a C++ program, for example:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp gp-VirtualEnv">(gdb)</span> <span class="go">b src/arrow/python/arrow_to_pandas.cc:1874</span>
<span class="go">No source file named src/arrow/python/arrow_to_pandas.cc.</span>
<span class="go">Make breakpoint pending on future shared library load? (y or [n]) y</span>
<span class="go">Breakpoint 1 (src/arrow/python/arrow_to_pandas.cc:1874) pending.</span>
</pre></div>
</div>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>The <a class="reference internal" href="../cpp/gdb.html#cpp-gdb-extension"><span class="std std-ref">GDB extension for Arrow C++</span></a>.</p>
</div>
</section>
</section>
</section>
<section id="building-on-windows">
<span id="build-pyarrow-win"></span><h2>Building on Windows<a class="headerlink" href="#building-on-windows" title="Permalink to this heading">#</a></h2>
<p>Building on Windows requires one of the following compilers to be installed:</p>
<ul class="simple">
<li><p><a class="reference external" href="https://download.visualstudio.microsoft.com/download/pr/3e542575-929e-4297-b6c6-bef34d0ee648/639c868e1219c651793aff537a1d3b77/vs_buildtools.exe">Build Tools for Visual Studio 2017</a></p></li>
<li><p>Visual Studio 2017</p></li>
</ul>
<p>During the setup of Build Tools, ensure at least one Windows SDK is selected.</p>
<p>We bootstrap a conda environment similar to above, but skipping some of the
Linux/macOS-only packages:</p>
<p>First, starting from a fresh clone of Apache Arrow:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/apache/arrow.git
</pre></div>
</div>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>create<span class="w"> </span>-y<span class="w"> </span>-n<span class="w"> </span>pyarrow-dev<span class="w"> </span>-c<span class="w"> </span>conda-forge<span class="w"> </span>^
<span class="go"> --file arrow\ci\conda_env_cpp.txt ^</span>
<span class="go"> --file arrow\ci\conda_env_python.txt ^</span>
<span class="go"> --file arrow\ci\conda_env_gandiva.txt ^</span>
<span class="go"> python=3.10</span>
<span class="gp">$ </span>conda<span class="w"> </span>activate<span class="w"> </span>pyarrow-dev
</pre></div>
</div>
<p>Now, we build and install Arrow C++ libraries.</p>
<p>We set the path of the installation directory of the Arrow C++ libraries as
<code class="docutils literal notranslate"><span class="pre">ARROW_HOME</span></code>. When using a conda environment, Arrow C++ is installed
in the environment directory, which path is saved in the
<a class="reference external" href="https://docs.conda.io/projects/conda-build/en/latest/user-guide/environment-variables.html#environment-variables-that-affect-the-build-process">CONDA_PREFIX</a>
environment variable.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">set</span><span class="w"> </span><span class="nv">ARROW_HOME</span><span class="o">=</span>%CONDA_PREFIX%<span class="se">\L</span>ibrary
</pre></div>
</div>
<p>Let’s configure, build and install the Arrow C++ libraries:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>mkdir<span class="w"> </span>arrow<span class="se">\c</span>pp<span class="se">\b</span>uild
<span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow<span class="se">\c</span>pp<span class="se">\b</span>uild
<span class="gp">$ </span>cmake<span class="w"> </span>-G<span class="w"> </span><span class="s2">&quot;Ninja&quot;</span><span class="w"> </span>^
<span class="go"> -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^</span>
<span class="go"> -DCMAKE_UNITY_BUILD=ON ^</span>
<span class="go"> -DARROW_COMPUTE=ON ^</span>
<span class="go"> -DARROW_CSV=ON ^</span>
<span class="go"> -DARROW_CXXFLAGS=&quot;/WX /MP&quot; ^</span>
<span class="go"> -DARROW_DATASET=ON ^</span>
<span class="go"> -DARROW_FILESYSTEM=ON ^</span>
<span class="go"> -DARROW_HDFS=ON ^</span>
<span class="go"> -DARROW_JSON=ON ^</span>
<span class="go"> -DARROW_PARQUET=ON ^</span>
<span class="go"> -DARROW_WITH_LZ4=ON ^</span>
<span class="go"> -DARROW_WITH_SNAPPY=ON ^</span>
<span class="go"> -DARROW_WITH_ZLIB=ON ^</span>
<span class="go"> -DARROW_WITH_ZSTD=ON ^</span>
<span class="go"> ..</span>
<span class="gp">$ </span>cmake<span class="w"> </span>--build<span class="w"> </span>.<span class="w"> </span>--target<span class="w"> </span>install<span class="w"> </span>--config<span class="w"> </span>Release
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>Now, we can build pyarrow:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow<span class="se">\p</span>ython
<span class="gp">$ </span><span class="nb">set</span><span class="w"> </span><span class="nv">PYARROW_WITH_PARQUET</span><span class="o">=</span><span class="m">1</span>
<span class="gp">$ </span><span class="nb">set</span><span class="w"> </span><span class="nv">CONDA_DLL_SEARCH_MODIFICATION_ENABLE</span><span class="o">=</span><span class="m">1</span>
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>For building pyarrow, the above defined environment variables need to also
be set. Remember this if to want to re-build <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code> after your initial build.</p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If you are using Conda with Python 3.9 or earlier, you must
set <code class="docutils literal notranslate"><span class="pre">CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1</span></code>.</p>
</div>
<p>Then run the unit tests with:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow<span class="se">\p</span>ython
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>pyarrow
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>With the above instructions the Arrow C++ libraries are not bundled with
the Python extension. This is recommended for development as it allows the
C++ libraries to be re-built separately.</p>
<p>If you are using the conda package manager then conda will ensure the Arrow C++
libraries are found. In case you are <em>not</em> using conda then you have to:</p>
<ul class="simple">
<li><p>add the path of installed DLL libraries to <code class="docutils literal notranslate"><span class="pre">PATH</span></code> every time before
importing <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>, or</p></li>
<li><p>bundle the Arrow C++ libraries with <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>.</p></li>
</ul>
<p>If you want to bundle the Arrow C++ libraries with <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>, set the
<code class="docutils literal notranslate"><span class="pre">PYARROW_BUNDLE_ARROW_CPP</span></code> environment variable before building <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">set</span><span class="w"> </span><span class="nv">PYARROW_BUNDLE_ARROW_CPP</span><span class="o">=</span><span class="m">1</span>
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
</pre></div>
</div>
<p>Note that bundled Arrow C++ libraries will not be automatically
updated when rebuilding Arrow C++.</p>
</div>
<section id="caveats">
<h3>Caveats<a class="headerlink" href="#caveats" title="Permalink to this heading">#</a></h3>
</section>
</section>
<section id="relevant-components-and-environment-variables">
<h2>Relevant components and environment variables<a class="headerlink" href="#relevant-components-and-environment-variables" title="Permalink to this heading">#</a></h2>
<p>List of relevant Arrow CMake flags and corresponding environment variables
to be used when building PyArrow are:</p>
<table class="table">
<colgroup>
<col style="width: 50.0%" />
<col style="width: 50.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Arrow flags/options</p></th>
<th class="head"><p>Corresponding environment variables for PyArrow</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">CMAKE_BUILD_TYPE</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUILD_TYPE</span></code> (release, debug or relwithdebinfo)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_GCS</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_GCS</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_S3</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_S3</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_HDFS</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_HDFS</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_CUDA</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_CUDA</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_SUBSTRAIT</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_SUBSTRAIT</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_FLIGHT</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_FLIGHT</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_DATASET</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_DATASET</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_PARQUET</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_PARQUET</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PARQUET_REQUIRE_ENCRYPTION</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_PARQUET_ENCRYPTION</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_TENSORFLOW</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_TENSORFLOW</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_ORC</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_ORC</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_GANDIVA</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_GANDIVA</span></code></p></td>
</tr>
</tbody>
</table>
<p>List of relevant environment variables that can also be used to build
PyArrow are:</p>
<table class="table">
<colgroup>
<col style="width: 33.3%" />
<col style="width: 33.3%" />
<col style="width: 33.3%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>PyArrow environment variable</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Default value</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_CMAKE_GENERATOR</span></code></p></td>
<td><p>Example: <code class="docutils literal notranslate"><span class="pre">'Visual</span> <span class="pre">Studio</span> <span class="pre">15</span> <span class="pre">2017</span> <span class="pre">Win64'</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_CMAKE_OPTIONS</span></code></p></td>
<td><p>Extra CMake and Arrow options (ex. <code class="docutils literal notranslate"><span class="pre">&quot;-DARROW_SIMD_LEVEL=NONE</span> <span class="pre">-DCMAKE_OSX_ARCHITECTURES=x86_64;arm64&quot;</span></code>)</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_CXXFLAGS</span></code></p></td>
<td><p>Extra C++ compiler flags</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_GENERATE_COVERAGE</span></code></p></td>
<td><p>Setting <code class="docutils literal notranslate"><span class="pre">Xlinetrace</span></code> flag to true for the Cython compiler</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">false</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUNDLE_ARROW_CPP</span></code></p></td>
<td><p>Bundle the Arrow C++ libraries</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">0</span></code> (<code class="docutils literal notranslate"><span class="pre">OFF</span></code>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUNDLE_CYTHON_CPP</span></code></p></td>
<td><p>Bundle the C++ files generated by Cython</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">0</span></code> (<code class="docutils literal notranslate"><span class="pre">OFF</span></code>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_INSTALL_TESTS</span></code></p></td>
<td><p>Add the test to the python package</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">1</span></code> (<code class="docutils literal notranslate"><span class="pre">ON</span></code>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUILD_VERBOSE</span></code></p></td>
<td><p>Enable verbose output from Makefile builds</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">0</span></code> (<code class="docutils literal notranslate"><span class="pre">OFF</span></code>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_PARALLEL</span></code></p></td>
<td><p>Number of processes used to compile PyArrow’s C++/Cython components</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
</tbody>
</table>
</section>
<section id="deleting-stale-build-artifacts">
<h2>Deleting stale build artifacts<a class="headerlink" href="#deleting-stale-build-artifacts" title="Permalink to this heading">#</a></h2>
<p>When there have been changes to the structure of the Arrow C++ library or PyArrow,
a thorough cleaning is recommended as a first attempt to fixing build errors.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>It is not necessarily intuitive from the error itself that the problem is due to stale artifacts.
Example of a build error from stale artifacts is “Unknown CMake command “arrow_keep_backward_compatibility””.</p>
</div>
<p>To delete stale Arrow C++ build artifacts:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>rm<span class="w"> </span>-rf<span class="w"> </span>arrow/cpp/build
</pre></div>
</div>
<p>To delete stale PyArrow build artifacts:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>clean<span class="w"> </span>-Xfd<span class="w"> </span>python
</pre></div>
</div>
<p>If using a Conda environment, there are some build artifacts that get installed in
<code class="docutils literal notranslate"><span class="pre">$ARROW_HOME</span></code> (aka <code class="docutils literal notranslate"><span class="pre">$CONDA_PREFIX</span></code>). For example, <code class="docutils literal notranslate"><span class="pre">$ARROW_HOME/lib/cmake/Arrow*</span></code>,
<code class="docutils literal notranslate"><span class="pre">$ARROW_HOME/include/arrow</span></code>, <code class="docutils literal notranslate"><span class="pre">$ARROW_HOME/lib/libarrow*</span></code>, etc.</p>
<p>These files can be manually deleted. If unsure which files to erase, one approach
is to recreate the Conda environment.</p>
<p>Either delete the current one, and start fresh:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>deactivate
<span class="gp">$ </span>conda<span class="w"> </span>remove<span class="w"> </span>-n<span class="w"> </span>pyarrow-dev
</pre></div>
</div>
<p>Or, less destructively, create a different environment with a different name.</p>
</section>
<section id="installing-nightly-packages">
<h2>Installing Nightly Packages<a class="headerlink" href="#installing-nightly-packages" title="Permalink to this heading">#</a></h2>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>These packages are not official releases. Use them at your own risk.</p>
</div>
<p>PyArrow has nightly wheels and Conda packages for testing purposes.</p>
<p>These may be suitable for downstream libraries in their continuous integration
setup to maintain compatibility with the upcoming PyArrow features,
deprecations and/or feature removals.</p>
<p>Install the development version of PyArrow from <a class="reference external" href="https://anaconda.org/arrow-nightlies/pyarrow">arrow-nightlies</a> conda channel:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>conda<span class="w"> </span>install<span class="w"> </span>-c<span class="w"> </span>arrow-nightlies<span class="w"> </span>pyarrow
</pre></div>
</div>
<p>Note that this requires to use the <code class="docutils literal notranslate"><span class="pre">conda-forge</span></code> channel for all other
packages (<code class="docutils literal notranslate"><span class="pre">conda</span> <span class="pre">config</span> <span class="pre">--add</span> <span class="pre">channels</span> <span class="pre">conda-forge</span></code>).</p>
<p>Install the development version from an <a class="reference external" href="https://gemfury.com/arrow-nightlies">alternative PyPI</a> index:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>--extra-index-url<span class="w"> </span>https://pypi.fury.io/arrow-nightlies/<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--prefer-binary<span class="w"> </span>--pre<span class="w"> </span>pyarrow
</pre></div>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="java/development.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Development Guidelines</p>
</div>
</a>
<a class="right-next"
href="continuous_integration/index.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Continuous Integration</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#coding-style">Coding Style</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#unit-testing">Unit Testing</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#test-groups">Test Groups</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#doctest">Doctest</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#benchmarking">Benchmarking</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#building-on-linux-and-macos">Building on Linux and macOS</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#system-requirements">System Requirements</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#environment-setup-and-build">Environment Setup and Build</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#using-conda">Using Conda</a></li>
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#using-system-and-bundled-dependencies">Using system and bundled dependencies</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#build-and-test">Build and test</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#docker-examples">Docker examples</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#debugging">Debugging</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#using-gdb-on-linux">Using gdb on Linux</a></li>
</ul>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#building-on-windows">Building on Windows</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#caveats">Caveats</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#relevant-components-and-environment-variables">Relevant components and environment variables</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#deleting-stale-build-artifacts">Deleting stale build artifacts</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installing-nightly-packages">Installing Nightly Packages</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/developers/python.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>