blob: a93fe5685e808b7f64a034057a33e115e401b996 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="../../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Building PyArrow &#8212; Apache Arrow v23.0.0.dev37</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="../../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=03e43079" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../_static/sphinx-design.min.css?v=95c83b7e" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css?v=8dcd28dc" />
<!-- So that users can add custom icons -->
<script src="../../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../_static/documentation_options.js?v=9fc6757a"></script>
<script src="../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
<script src="../../_static/copybutton.js?v=3bb21c8c"></script>
<script src="../../_static/design-tabs.js?v=f930bc37"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'developers/python/building';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/developers/python/building.html" />
<link rel="icon" href="../../_static/favicon.ico"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Developing PyArrow" href="development.html" />
<link rel="prev" title="Python Development" href="index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="23.0.0.dev37" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class=" navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../index.html">
<img src="../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v23.0.0.dev37 - Home"/>
<img src="../../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v23.0.0.dev37 - Home"/>
</a></div>
</div>
<div class=" navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="../index.html">
Development
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../implementations.html">
Implementations
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item"><div class="kapa-ai-bot">
<script
async
src="https://widget.kapa.ai/kapa-widget.bundle.js"
data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
data-project-name="Apache Arrow"
data-project-color="#000000"
data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc."
data-consent-required="true"
data-user-analytics-cookie-enabled="false"
data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
></script>
</div>
</div>
<div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i>
<span class="sr-only">LinkedIn</span></a>
</li>
<li class="nav-item">
<a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i>
<span class="sr-only">BlueSky</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
<span class="fa-solid fa-outdent"></span>
</button>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="../index.html">
Development
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../implementations.html">
Implementations
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><div class="kapa-ai-bot">
<script
async
src="https://widget.kapa.ai/kapa-widget.bundle.js"
data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
data-project-name="Apache Arrow"
data-project-color="#000000"
data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc."
data-consent-required="true"
data-user-analytics-cookie-enabled="false"
data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
></script>
</div>
</div>
<div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i>
<span class="sr-only">LinkedIn</span></a>
</li>
<li class="nav-item">
<a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i>
<span class="sr-only">BlueSky</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../bug_reports.html">Bug reports and feature requests</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../guide/index.html">New Contributor’s Guide</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../guide/architectural_overview.html">Architectural Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../guide/communication.html">Communication</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../guide/step_by_step/index.html">Steps in making your first PR</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/set_up.html">Set up</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/building.html">Building the Arrow libraries 🏋🏿‍♀️</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/finding_issues.html">Finding good first issues 🔎</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/arrow_codebase.html">Working on the Arrow codebase 🧐</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/testing.html">Testing 🧪</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/styling.html">Styling 😎</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/step_by_step/pr_lifecycle.html">Lifecycle of a pull request</a></li>
</ul>
</details></li>
<li class="toctree-l2"><a class="reference internal" href="../guide/documentation.html">Helping with documentation</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../guide/tutorials/index.html">Tutorials</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l3"><a class="reference internal" href="../guide/tutorials/python_tutorial.html">Python tutorial</a></li>
<li class="toctree-l3"><a class="reference internal" href="../guide/tutorials/r_tutorial.html">R tutorials</a></li>
</ul>
</details></li>
<li class="toctree-l2"><a class="reference internal" href="../guide/resources.html">Additional information and resources</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../overview.html">Contributing Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reviewing.html">Reviewing contributions</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../cpp/index.html">C++ Development</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../cpp/building.html">Building Arrow C++</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cpp/development.html">Development Guidelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cpp/windows.html">Developing on Windows</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cpp/emscripten.html">Cross compiling for WebAssembly with Emscripten</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cpp/conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cpp/fuzzing.html">Fuzzing Arrow C++</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cpp/compute.html">Developing Arrow C++ Compute</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../cpp/acero.html">Developing Acero</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l3"><a class="reference internal" href="../cpp/acero/swiss_table.html">Swiss Table</a></li>
</ul>
</details></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../java/index.html">Java Development</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../java/building.html">Building Arrow Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../java/development.html">Development Guidelines</a></li>
</ul>
</details></li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="index.html">Python Development</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Building PyArrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="development.html">Developing PyArrow</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../continuous_integration/index.html">Continuous Integration</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../continuous_integration/overview.html">Continuous Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../continuous_integration/docker.html">Running Docker Builds</a></li>
<li class="toctree-l2"><a class="reference internal" href="../continuous_integration/archery.html">Daily Development using Archery</a></li>
<li class="toctree-l2"><a class="reference internal" href="../continuous_integration/crossbow.html">Packaging and Testing with Crossbow</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference internal" href="../documentation.html">Building the Documentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../release.html">Release Management Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../release_verification.html">Release Verification Process</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
<div class="sidebar-primary-item">
<div id="ethical-ad-placement"
class="flat"
data-ea-publisher="readthedocs"
data-ea-type="readthedocs-sidebar"
data-ea-manual="true">
</div></div>
</div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../index.html" class="nav-link">Development</a></li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Python Development</a></li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Building PyArrow</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="building-pyarrow">
<span id="build-pyarrow"></span><h1>Building PyArrow<a class="headerlink" href="#building-pyarrow" title="Link to this heading">#</a></h1>
<p>This page provides source build instructions for PyArrow for all platforms.</p>
<section id="system-requirements">
<h2>System Requirements<a class="headerlink" href="#system-requirements" title="Link to this heading">#</a></h2>
<div class="sd-tab-set docutils">
<input checked="checked" id="sd-tab-item-0" name="sd-tab-set-0" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="linux-macos" for="sd-tab-item-0">
Linux and macOS</label><div class="sd-tab-content docutils">
<p>On macOS, any modern XCode or Xcode Command Line Tools (<code class="docutils literal notranslate"><span class="pre">xcode-select</span> <span class="pre">--install</span></code>)
are sufficient.</p>
<p>On Linux, for this guide, we require a minimum of gcc or clang 9.
You can check your version by running</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>gcc<span class="w"> </span>--version
</pre></div>
</div>
<p>If the system compiler is older than gcc 9, it can be set to a newer version
using the <code class="docutils literal notranslate"><span class="pre">$CC</span></code> and <code class="docutils literal notranslate"><span class="pre">$CXX</span></code> environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">CC</span><span class="o">=</span>gcc-9
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">CXX</span><span class="o">=</span>g++-9
</pre></div>
</div>
</div>
<input id="sd-tab-item-1" name="sd-tab-set-0" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="wins" for="sd-tab-item-1">
Windows</label><div class="sd-tab-content docutils">
<p>Building on Windows requires one of the following compilers to be
installed:</p>
<ul class="simple">
<li><p><a class="reference external" href="https://aka.ms/vs/17/release/vs_BuildTools.exe">Build Tools for Visual Studio 2022</a> or</p></li>
<li><p>Visual Studio 2022</p></li>
</ul>
<p>During the setup of Build Tools, ensure at least one Windows SDK
is selected.</p>
</div>
</div>
</section>
<section id="environment-setup">
<h2>Environment setup<a class="headerlink" href="#environment-setup" title="Link to this heading">#</a></h2>
<p>First, start from a fresh clone of Apache Arrow:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/apache/arrow.git
</pre></div>
</div>
<p>There are two supported ways to set up the build environment for PyArrow: using
<strong>Conda</strong> to manage the dependencies or using <strong>pip</strong> with manual dependency
management.</p>
<p>Both methods are shown bellow for Linux and macOS. For Windows, only the
Conda-based setup is currently documented, skipping some of the
Linux/macOS-only packages.</p>
<p>Note that in case you are not using conda on a Windows platform, Arrow C++
libraries need to be bundled with <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>. For extra information see the
Windows tab under the <a class="reference internal" href="#pyarrow-build-section"><span class="std std-ref">Build PyArrow</span></a> section.</p>
<div class="sd-tab-set docutils">
<input checked="checked" id="sd-tab-item-2" name="sd-tab-set-1" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="linux-macos" for="sd-tab-item-2">
Linux and macOS using conda</label><div class="sd-tab-content docutils">
<p>Pull in the test data and setup the environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow
<span class="gp">$ </span>git<span class="w"> </span>submodule<span class="w"> </span>update<span class="w"> </span>--init
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">PARQUET_TEST_DATA</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">${</span><span class="nv">PWD</span><span class="si">}</span><span class="s2">/cpp/submodules/parquet-testing/data&quot;</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_TEST_DATA</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">${</span><span class="nv">PWD</span><span class="si">}</span><span class="s2">/testing/data&quot;</span>
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p>The <a class="reference external" href="https://conda.io/">conda</a> package manager allows installing build-time
dependencies for Arrow C++ and PyArrow as pre-built binaries, which can make
Arrow development easier and faster.</p>
<p>Let’s create a conda environment with all the C++ build and Python dependencies
from conda-forge, targeting development for Python 3.13:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>create<span class="w"> </span>-y<span class="w"> </span>-n<span class="w"> </span>pyarrow-dev<span class="w"> </span>-c<span class="w"> </span>conda-forge<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_unix.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_cpp.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_python.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--file<span class="w"> </span>arrow/ci/conda_env_gandiva.txt<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>compilers<span class="w"> </span><span class="se">\</span>
<span class="w"> </span><span class="nv">python</span><span class="o">=</span><span class="m">3</span>.13<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>pandas
</pre></div>
</div>
<p>As of January 2019, the <code class="docutils literal notranslate"><span class="pre">compilers</span></code> package is needed on many Linux
distributions to use packages from conda-forge.</p>
<p>With this out of the way, you can now activate the conda environment</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>activate<span class="w"> </span>pyarrow-dev
</pre></div>
</div>
<p>We need to set some environment variables to let Arrow’s build system know
about our build toolchain:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_HOME</span><span class="o">=</span><span class="nv">$CONDA_PREFIX</span>
</pre></div>
</div>
</div>
<input id="sd-tab-item-3" name="sd-tab-set-1" type="radio">
<label class="sd-tab-label" for="sd-tab-item-3">
Linux and macOS using pip</label><div class="sd-tab-content docutils">
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>If you installed Python using the Anaconda distribution or <a class="reference external" href="https://conda.io/miniconda.html">Miniconda</a>, you cannot currently use a
pip-based virtual environment. Please follow the conda-based development
instructions instead.</p>
</div>
<p>Pull in the test data and setup the environment variables:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow
<span class="gp">$ </span>git<span class="w"> </span>submodule<span class="w"> </span>update<span class="w"> </span>--init
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">PARQUET_TEST_DATA</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">${</span><span class="nv">PWD</span><span class="si">}</span><span class="s2">/cpp/submodules/parquet-testing/data&quot;</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_TEST_DATA</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">${</span><span class="nv">PWD</span><span class="si">}</span><span class="s2">/testing/data&quot;</span>
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<p><strong>Using system and bundled dependencies</strong></p>
<p>If not using conda, you must arrange for your system to provide the required
build tools and dependencies. Note that if some dependencies are absent,
the Arrow C++ build chain may still be able to download and compile them
on the fly, but this will take a longer time than with pre-installed binaries.</p>
<p>On macOS, use Homebrew to install all dependencies required for
building Arrow C++:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>brew<span class="w"> </span>update<span class="w"> </span><span class="o">&amp;&amp;</span><span class="w"> </span>brew<span class="w"> </span>bundle<span class="w"> </span>--file<span class="o">=</span>arrow/cpp/Brewfile
</pre></div>
</div>
<p>See <a class="reference internal" href="../cpp/building.html#cpp-build-dependency-management"><span class="std std-ref">here</span></a> for a list of dependencies you
may need.</p>
<p>On Debian/Ubuntu, you need the following minimal set of dependencies:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>sudo<span class="w"> </span>apt-get<span class="w"> </span>install<span class="w"> </span>build-essential<span class="w"> </span>ninja-build<span class="w"> </span>cmake<span class="w"> </span>python3-dev
</pre></div>
</div>
<p>Now, let’s create a Python virtual environment with all Python dependencies
in the same folder as the repositories, and a target installation folder:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>python3<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>pyarrow-dev
<span class="gp">$ </span><span class="nb">source</span><span class="w"> </span>./pyarrow-dev/bin/activate
<span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>arrow/python/requirements-build.txt
<span class="gp">$ </span><span class="c1"># This is the folder where we will install the Arrow libraries during</span>
<span class="gp">$ </span><span class="c1"># development</span>
<span class="gp">$ </span>mkdir<span class="w"> </span>dist
</pre></div>
</div>
<p>If your CMake version is too old on Linux, you could get a newer one via
<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">cmake</span></code>.</p>
<p>We need to set some environment variables to let Arrow’s build system know
about our build toolchain:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_HOME</span><span class="o">=</span><span class="k">$(</span><span class="nb">pwd</span><span class="k">)</span>/dist
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">LD_LIBRARY_PATH</span><span class="o">=</span><span class="k">$(</span><span class="nb">pwd</span><span class="k">)</span>/dist/lib:<span class="nv">$LD_LIBRARY_PATH</span>
<span class="gp">$ </span><span class="nb">export</span><span class="w"> </span><span class="nv">CMAKE_PREFIX_PATH</span><span class="o">=</span><span class="nv">$ARROW_HOME</span>:<span class="nv">$CMAKE_PREFIX_PATH</span>
</pre></div>
</div>
</div>
<input id="sd-tab-item-4" name="sd-tab-set-1" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="wins" for="sd-tab-item-4">
Windows</label><div class="sd-tab-content docutils">
<p>Let’s create a conda environment with all the C++ build and Python dependencies
from conda-forge, targeting development for Python 3.13:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>create<span class="w"> </span>-y<span class="w"> </span>-n<span class="w"> </span>pyarrow-dev<span class="w"> </span>-c<span class="w"> </span>conda-forge<span class="w"> </span>^
<span class="go"> --file arrow\ci\conda_env_cpp.txt ^</span>
<span class="go"> --file arrow\ci\conda_env_python.txt ^</span>
<span class="go"> --file arrow\ci\conda_env_gandiva.txt ^</span>
<span class="go"> python=3.13</span>
<span class="gp">$ </span>conda<span class="w"> </span>activate<span class="w"> </span>pyarrow-dev
</pre></div>
</div>
<p>Now, we can build and install Arrow C++ libraries.</p>
<p>We set the path of the installation directory of the Arrow C++
libraries as <code class="docutils literal notranslate"><span class="pre">ARROW_HOME</span></code>. When using a conda environment,
Arrow C++ is installed in the environment directory, which path
is saved in the <a class="reference external" href="https://docs.conda.io/projects/conda-build/en/latest/user-guide/environment-variables.html#environment-variables-that-affect-the-build-process">CONDA_PREFIX</a>
environment variable.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">set</span><span class="w"> </span><span class="nv">ARROW_HOME</span><span class="o">=</span>%CONDA_PREFIX%<span class="se">\L</span>ibrary
</pre></div>
</div>
</div>
</div>
</section>
<section id="build">
<h2>Build<a class="headerlink" href="#build" title="Link to this heading">#</a></h2>
<p>First we need to configure, build and install the Arrow C++ libraries.
Then we can build PyArrow.</p>
<section id="build-c">
<h3>Build C++<a class="headerlink" href="#build-c" title="Link to this heading">#</a></h3>
<div class="sd-tab-set docutils">
<input checked="checked" id="sd-tab-item-5" name="sd-tab-set-2" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="linux-macos" for="sd-tab-item-5">
Linux and macOS</label><div class="sd-tab-content docutils">
<p>Now build the Arrow C++ libraries and install them into the directory we
created above (stored in <code class="docutils literal notranslate"><span class="pre">$ARROW_HOME</span></code>):</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>cmake<span class="w"> </span>-S<span class="w"> </span>arrow/cpp<span class="w"> </span>-B<span class="w"> </span>arrow/cpp/build<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DCMAKE_INSTALL_PREFIX<span class="o">=</span><span class="nv">$ARROW_HOME</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--preset<span class="w"> </span>ninja-release-python
<span class="gp">$ </span>cmake<span class="w"> </span>--build<span class="w"> </span>arrow/cpp/build<span class="w"> </span>--target<span class="w"> </span>install
</pre></div>
</div>
<p><strong>About presets</strong></p>
<p><code class="docutils literal notranslate"><span class="pre">ninja-release-python</span></code> is not the only preset available - if you would like a
build with more features like CUDA, Flight and Gandiva support you may opt for
the <code class="docutils literal notranslate"><span class="pre">ninja-release-python-maximal</span></code> preset. If you wanted less features, (i.e.
removing ORC and dataset support) you could opt for
<code class="docutils literal notranslate"><span class="pre">ninja-release-python-minimal</span></code>. Changing the word <code class="docutils literal notranslate"><span class="pre">release</span></code> to <code class="docutils literal notranslate"><span class="pre">debug</span></code>
with any of the aforementioned presets will generate a debug build of Arrow.</p>
<p><strong>Individual components</strong></p>
<p>The presets are provided as a convenience, but you may instead opt to
specify the individual components:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>cmake<span class="w"> </span>-S<span class="w"> </span>arrow/cpp<span class="w"> </span>-B<span class="w"> </span>arrow/cpp/build<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DCMAKE_INSTALL_PREFIX<span class="o">=</span><span class="nv">$ARROW_HOME</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DCMAKE_BUILD_TYPE<span class="o">=</span>Debug<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_BUILD_TESTS<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_COMPUTE<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_CSV<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_DATASET<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_FILESYSTEM<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_HDFS<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_JSON<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_PARQUET<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_BROTLI<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_BZ2<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_LZ4<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_SNAPPY<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_ZLIB<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DARROW_WITH_ZSTD<span class="o">=</span>ON<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-DPARQUET_REQUIRE_ENCRYPTION<span class="o">=</span>ON
<span class="gp">$ </span>cmake<span class="w"> </span>--build<span class="w"> </span>arrow/cpp/build<span class="w"> </span>--target<span class="w"> </span>install<span class="w"> </span>-j4
</pre></div>
</div>
<p>If multiple versions of Python are installed in your environment, you may have
to pass additional parameters to CMake so that it can find the right
executable, headers and libraries. For example, specifying
<code class="docutils literal notranslate"><span class="pre">-DPython3_EXECUTABLE=&lt;path/to/bin/python&gt;</span></code> lets CMake choose the
Python executable which you are using.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>On Linux systems with support for building on multiple architectures,
<code class="docutils literal notranslate"><span class="pre">make</span></code> may install libraries in the <code class="docutils literal notranslate"><span class="pre">lib64</span></code> directory by default. For
this reason we recommend passing <code class="docutils literal notranslate"><span class="pre">-DCMAKE_INSTALL_LIBDIR=lib</span></code> because the
Python build scripts assume the library directory is <code class="docutils literal notranslate"><span class="pre">lib</span></code></p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If you have conda installed but are not using it to manage dependencies,
and you have trouble building the C++ library, you may need to set
<code class="docutils literal notranslate"><span class="pre">-DARROW_DEPENDENCY_SOURCE=AUTO</span></code> or some other value (described
<a class="reference internal" href="../cpp/building.html#cpp-build-dependency-management"><span class="std std-ref">here</span></a>)
to explicitly tell CMake not to use conda.</p>
</div>
</div>
<input id="sd-tab-item-6" name="sd-tab-set-2" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="wins" for="sd-tab-item-6">
Windows</label><div class="sd-tab-content docutils">
<p>There are presets provided as a convenience for building C++ (see Linux and macOS
tab). Here we will instead specify the individual components:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>mkdir<span class="w"> </span>arrow<span class="se">\c</span>pp<span class="se">\b</span>uild
<span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow<span class="se">\c</span>pp<span class="se">\b</span>uild
<span class="gp">$ </span>cmake<span class="w"> </span>-G<span class="w"> </span><span class="s2">&quot;Ninja&quot;</span><span class="w"> </span>^
<span class="go"> -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^</span>
<span class="go"> -DCMAKE_UNITY_BUILD=ON ^</span>
<span class="go"> -DARROW_COMPUTE=ON ^</span>
<span class="go"> -DARROW_CSV=ON ^</span>
<span class="go"> -DARROW_CXXFLAGS=&quot;/WX /MP&quot; ^</span>
<span class="go"> -DARROW_DATASET=ON ^</span>
<span class="go"> -DARROW_FILESYSTEM=ON ^</span>
<span class="go"> -DARROW_HDFS=ON ^</span>
<span class="go"> -DARROW_JSON=ON ^</span>
<span class="go"> -DARROW_PARQUET=ON ^</span>
<span class="go"> -DARROW_WITH_LZ4=ON ^</span>
<span class="go"> -DARROW_WITH_SNAPPY=ON ^</span>
<span class="go"> -DARROW_WITH_ZLIB=ON ^</span>
<span class="go"> -DARROW_WITH_ZSTD=ON ^</span>
<span class="go"> ..</span>
<span class="gp">$ </span>cmake<span class="w"> </span>--build<span class="w"> </span>.<span class="w"> </span>--target<span class="w"> </span>install<span class="w"> </span>--config<span class="w"> </span>Release
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
</div>
</div>
<section id="optional-build-components">
<h4>Optional build components<a class="headerlink" href="#optional-build-components" title="Link to this heading">#</a></h4>
<p>There are several optional components that can be enabled or disabled by setting
specific flags to <code class="docutils literal notranslate"><span class="pre">ON</span></code> or <code class="docutils literal notranslate"><span class="pre">OFF</span></code>, respectively. See the list of
<a class="reference internal" href="#python-dev-env-variables"><span class="std std-ref">Relevant components and environment variables</span></a> below.</p>
<p>You may choose between different kinds of C++ build types:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=Release</span></code> (the default) produces a build with optimizations
enabled and debugging information disabled;</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=Debug</span></code> produces a build with optimizations
disabled and debugging information enabled;</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">-DCMAKE_BUILD_TYPE=RelWithDebInfo</span></code> produces a build with both optimizations
and debugging information enabled.</p></li>
</ul>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="../cpp/building.html#cpp-building-building"><span class="std std-ref">Building Arrow C++</span></a>.</p>
<p>For any other C++ build challenges, see <a class="reference internal" href="../cpp/index.html#cpp-development"><span class="std std-ref">C++ Development</span></a>.</p>
</div>
<p>In case you may need to rebuild the C++ part due to errors in the process it is
advisable to delete the build folder, see <a class="reference internal" href="#python-dev-env-variables"><span class="std std-ref">Relevant components and environment variables</span></a>.
If the build has passed successfully and you need to rebuild due to latest pull
from git main, then this step is not needed.</p>
</section>
</section>
<section id="pyarrow-build-section">
<span id="id1"></span><h3>Build PyArrow<a class="headerlink" href="#pyarrow-build-section" title="Link to this heading">#</a></h3>
<p>If you did build one of the optional components in C++, the equivalent components
will be enabled by default for building pyarrow. This default can be overridden
by setting the corresponding <code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_$COMPONENT</span></code> environment variable
to 0 or 1, see <a class="reference internal" href="#python-dev-env-variables"><span class="std std-ref">Relevant components and environment variables</span></a> below.</p>
<p>To build PyArrow run:</p>
<div class="sd-tab-set docutils">
<input checked="checked" id="sd-tab-item-7" name="sd-tab-set-3" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="linux-macos" for="sd-tab-item-7">
Linux and macOS</label><div class="sd-tab-content docutils">
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow/python
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
</div>
<input id="sd-tab-item-8" name="sd-tab-set-3" type="radio">
<label class="sd-tab-label" data-sync-group="language" data-sync-id="wins" for="sd-tab-item-8">
Windows</label><div class="sd-tab-content docutils">
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">pushd</span><span class="w"> </span>arrow<span class="se">\p</span>ython
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
<span class="gp">$ </span><span class="nb">popd</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If you are using Conda with Python 3.9 or earlier, you must
set <code class="docutils literal notranslate"><span class="pre">CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1</span></code>.</p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>With the above instructions the Arrow C++ libraries are not bundled with
the Python extension. This is recommended for development as it allows the
C++ libraries to be re-built separately.</p>
<p>If you are using the conda package manager then conda will ensure the Arrow C++
libraries are found. <strong>In case you are NOT using conda</strong> then you have to:</p>
<ul class="simple">
<li><p>add the path of installed DLL libraries to <code class="docutils literal notranslate"><span class="pre">PATH</span></code> every time before
importing <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>, or</p></li>
<li><p>bundle the Arrow C++ libraries with <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>.</p></li>
</ul>
<p><strong>Bundle Arrow C++ and PyArrow</strong></p>
<p>If you want to bundle the Arrow C++ libraries with <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>, set the
<code class="docutils literal notranslate"><span class="pre">PYARROW_BUNDLE_ARROW_CPP</span></code> environment variable before building <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">set</span><span class="w"> </span><span class="nv">PYARROW_BUNDLE_ARROW_CPP</span><span class="o">=</span><span class="m">1</span>
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
</pre></div>
</div>
<p>Note that bundled Arrow C++ libraries will not be automatically
updated when rebuilding Arrow C++.</p>
</div>
</div>
</div>
<p>To set the number of threads used to compile PyArrow’s C++/Cython components,
set the <code class="docutils literal notranslate"><span class="pre">PYARROW_PARALLEL</span></code> environment variable.</p>
<p>If you build PyArrow but then make changes to the Arrow C++ or PyArrow code,
you can end up with stale build artifacts. This can lead to
unexpected behavior or errors. To avoid this, you can clean the build
artifacts before rebuilding. See <a class="reference internal" href="#python-dev-env-variables"><span class="std std-ref">Relevant components and environment variables</span></a>.</p>
<p>By default, PyArrow will be built in release mode even if Arrow C++ has been
built in debug mode. To create a debug build of PyArrow, run
<code class="docutils literal notranslate"><span class="pre">export</span> <span class="pre">PYARROW_BUILD_TYPE=debug</span></code> prior to running <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">setup.py</span>
<span class="pre">build_ext</span> <span class="pre">--inplace</span></code> above. A <code class="docutils literal notranslate"><span class="pre">relwithdebinfo</span></code> build can be created
similarly.</p>
<section id="self-contained-wheel">
<h4>Self-Contained Wheel<a class="headerlink" href="#self-contained-wheel" title="Link to this heading">#</a></h4>
<p>If you’re preparing a PyArrow wheel for distribution (e.g., for PyPI), you’ll
need to build a self-contained wheel (including the Arrow and Parquet C++
libraries). This ensures that all necessary native libraries are bundled inside
the wheel, so users can install it without needing to have Arrow or Parquet
installed separately on their system.</p>
<p>To do this, pass the <code class="docutils literal notranslate"><span class="pre">--bundle-arrow-cpp</span></code> option to the build command:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>wheel<span class="w"> </span><span class="c1"># if not installed</span>
<span class="gp">$ </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--build-type<span class="o">=</span><span class="nv">$ARROW_BUILD_TYPE</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--bundle-arrow-cpp<span class="w"> </span>bdist_wheel
</pre></div>
</div>
<p>This option is typically only needed for releases or distribution scenarios,
not for local development.</p>
</section>
<section id="editable-install">
<h4>Editable install<a class="headerlink" href="#editable-install" title="Link to this heading">#</a></h4>
<p>To install an editable PyArrow build, run the following command from the
<code class="docutils literal notranslate"><span class="pre">arrow/python</span></code> directory:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">pip install -e . --no-build-isolation``</span>
</pre></div>
</div>
<p>This creates an <em>editable install</em>, meaning changes to the Python source code
will be reflected immediately without needing to reinstall the package.
The <code class="docutils literal notranslate"><span class="pre">--no-build-isolation</span></code> flag ensures that the build uses your current
environment’s dependencies instead of creating an isolated one. This is
especially useful during development and debugging.</p>
</section>
</section>
<section id="deleting-stale-build-artifacts">
<span id="stale-artifacts"></span><h3>Deleting stale build artifacts<a class="headerlink" href="#deleting-stale-build-artifacts" title="Link to this heading">#</a></h3>
<p>When there have been changes to the structure of the Arrow C++ library or PyArrow,
a thorough cleaning is recommended as a first attempt to fixing build errors.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>It is not necessarily intuitive from the error itself that the problem is due to stale artifacts.
Example of a build error from stale artifacts is
<code class="docutils literal notranslate"><span class="pre">Unknown</span> <span class="pre">CMake</span> <span class="pre">command</span> <span class="pre">&quot;arrow_keep_backward_compatibility&quot;</span></code>.</p>
</div>
<p>To delete stale Arrow C++ build artifacts:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>rm<span class="w"> </span>-rf<span class="w"> </span>arrow/cpp/build
</pre></div>
</div>
<p>To delete stale PyArrow build artifacts:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>clean<span class="w"> </span>-Xfd<span class="w"> </span>python
</pre></div>
</div>
<p>If using a Conda environment, there are some build artifacts that get installed in
<code class="docutils literal notranslate"><span class="pre">$ARROW_HOME</span></code> (aka <code class="docutils literal notranslate"><span class="pre">$CONDA_PREFIX</span></code>). For example, <code class="docutils literal notranslate"><span class="pre">$ARROW_HOME/lib/cmake/Arrow*</span></code>,
<code class="docutils literal notranslate"><span class="pre">$ARROW_HOME/include/arrow</span></code>, <code class="docutils literal notranslate"><span class="pre">$ARROW_HOME/lib/libarrow*</span></code>, etc.</p>
<p>These files can be manually deleted. If unsure which files to erase, one approach
is to recreate the Conda environment.</p>
<p>Either delete the current one, and start fresh:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>conda<span class="w"> </span>deactivate
<span class="gp">$ </span>conda<span class="w"> </span>remove<span class="w"> </span>-n<span class="w"> </span>pyarrow-dev
</pre></div>
</div>
<p>Or, less destructively, create a different environment with a different name.</p>
</section>
<section id="docker-examples">
<h3>Docker examples<a class="headerlink" href="#docker-examples" title="Link to this heading">#</a></h3>
<p>If you are having difficulty building the Python library from source, take a
look at the <a class="reference external" href="https://github.com/apache/arrow/tree/main/python/examples/minimal_build">python/examples/minimal_build</a>
directory which illustrates a complete build and test from source both with
the conda- and pip-based build methods.</p>
</section>
</section>
<section id="test">
<h2>Test<a class="headerlink" href="#test" title="Link to this heading">#</a></h2>
<p>Now you are ready to install test dependencies and run <a class="reference internal" href="development.html#python-unit-testing"><span class="std std-ref">Unit Testing</span></a>, as
described in development section.</p>
</section>
<section id="relevant-components-and-environment-variables">
<span id="python-dev-env-variables"></span><h2>Relevant components and environment variables<a class="headerlink" href="#relevant-components-and-environment-variables" title="Link to this heading">#</a></h2>
<p>List of relevant environment variables that can be used to build
PyArrow are:</p>
<div class="pst-scrollable-table-container"><table class="table">
<colgroup>
<col style="width: 33.3%" />
<col style="width: 33.3%" />
<col style="width: 33.3%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>PyArrow environment variable</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Default value</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUILD_TYPE</span></code></p></td>
<td><p>Build type for PyArrow (release, debug or relwithdebinfo), sets <code class="docutils literal notranslate"><span class="pre">CMAKE_BUILD_TYPE</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">release</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_CMAKE_GENERATOR</span></code></p></td>
<td><p>Example: <code class="docutils literal notranslate"><span class="pre">'Visual</span> <span class="pre">Studio</span> <span class="pre">17</span> <span class="pre">2022</span> <span class="pre">Win64'</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_CMAKE_OPTIONS</span></code></p></td>
<td><p>Extra CMake and Arrow options (ex. <code class="docutils literal notranslate"><span class="pre">&quot;-DARROW_SIMD_LEVEL=NONE</span> <span class="pre">-DCMAKE_OSX_ARCHITECTURES=x86_64;arm64&quot;</span></code>)</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_CXXFLAGS</span></code></p></td>
<td><p>Extra C++ compiler flags</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_GENERATE_COVERAGE</span></code></p></td>
<td><p>Setting <code class="docutils literal notranslate"><span class="pre">Xlinetrace</span></code> flag to true for the Cython compiler</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">false</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUNDLE_ARROW_CPP</span></code></p></td>
<td><p>Bundle the Arrow C++ libraries</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">0</span></code> (<code class="docutils literal notranslate"><span class="pre">OFF</span></code>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUNDLE_CYTHON_CPP</span></code></p></td>
<td><p>Bundle the C++ files generated by Cython</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">0</span></code> (<code class="docutils literal notranslate"><span class="pre">OFF</span></code>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_BUILD_VERBOSE</span></code></p></td>
<td><p>Enable verbose output from Makefile builds</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">0</span></code> (<code class="docutils literal notranslate"><span class="pre">OFF</span></code>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_PARALLEL</span></code></p></td>
<td><p>Number of processes used to compile PyArrow’s C++/Cython components</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">''</span></code></p></td>
</tr>
</tbody>
</table>
</div>
<p>The components being disabled or enabled when building PyArrow is by default
based on how Arrow C++ is build (i.e. it follows the <code class="docutils literal notranslate"><span class="pre">ARROW_$COMPONENT</span></code> flags).
However, the <code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_$COMPONENT</span></code> environment variables can still be used
to override this when building PyArrow (e.g. to disable components, or to enforce
certain components to be built):</p>
<div class="pst-scrollable-table-container"><table class="table">
<colgroup>
<col style="width: 50.0%" />
<col style="width: 50.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Arrow flags/options</p></th>
<th class="head"><p>Corresponding environment variables for PyArrow</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_GCS</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_GCS</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_S3</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_S3</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_AZURE</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_AZURE</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_HDFS</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_HDFS</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_CUDA</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_CUDA</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_SUBSTRAIT</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_SUBSTRAIT</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_FLIGHT</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_FLIGHT</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_ACERO</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_ACERO</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_DATASET</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_DATASET</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_PARQUET</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_PARQUET</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">PARQUET_REQUIRE_ENCRYPTION</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_PARQUET_ENCRYPTION</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_ORC</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_ORC</span></code></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW_GANDIVA</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">PYARROW_WITH_GANDIVA</span></code></p></td>
</tr>
</tbody>
</table>
</div>
</section>
<section id="installing-nightly-packages">
<h2>Installing Nightly Packages<a class="headerlink" href="#installing-nightly-packages" title="Link to this heading">#</a></h2>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>These packages are not official releases. Use them at your own risk.</p>
</div>
<p>PyArrow has nightly wheels for testing purposes hosted at
<a class="reference external" href="https://anaconda.org/scientific-python-nightly-wheels/pyarrow">scientific-python-nightly-wheels</a>.</p>
<p>These may be suitable for downstream libraries in their continuous integration
setup to maintain compatibility with the upcoming PyArrow features,
deprecations, and/or feature removals.</p>
<p>To install the most recent nightly version of PyArrow, run:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>-i<span class="w"> </span>https://pypi.anaconda.org/scientific-python-nightly-wheels/simple<span class="w"> </span><span class="se">\</span>
<span class="w"> </span>pyarrow
</pre></div>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
<a class="left-prev"
href="index.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Python Development</p>
</div>
</a>
<a class="right-next"
href="development.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Developing PyArrow</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<dialog id="pst-secondary-sidebar-modal"></dialog>
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#system-requirements">System Requirements</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#environment-setup">Environment setup</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#build">Build</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#build-c">Build C++</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#optional-build-components">Optional build components</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pyarrow-build-section">Build PyArrow</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#self-contained-wheel">Self-Contained Wheel</a></li>
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#editable-install">Editable install</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deleting-stale-build-artifacts">Deleting stale build artifacts</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#docker-examples">Docker examples</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#test">Test</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#relevant-components-and-environment-variables">Relevant components and environment variables</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installing-nightly-packages">Installing Nightly Packages</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/developers/python/building.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2025 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
<!-- # L10n: Setting the PST URL as an argument as this does not need to be localized -->
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.16.1.
</p></div>
</div>
</div>
</footer>
</body>
</html>