blob: 383e43b591cccc2d872e14cd8ff86781de7783f0 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Python tutorial &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../../../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/sphinx_highlight.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script src="../../../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'developers/guide/tutorials/python_tutorial';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/developers/guide/tutorials/python_tutorial.html" />
<link rel="icon" href="../../../_static/favicon.ico"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="R tutorials" href="r_tutorial.html" />
<link rel="prev" title="Tutorials" href="index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="../../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../../../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="../../index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="../../index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../../bug_reports.html">Bug reports and feature requests</a></li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../index.html">New Contributor’s Guide</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../architectural_overview.html">Architectural Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../communication.html">Communication</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../step_by_step/index.html">Steps in making your first PR</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/set_up.html">Set up</a></li>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/building.html">Building the Arrow libraries 🏋🏿‍♀️</a></li>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/finding_issues.html">Finding good first issues 🔎</a></li>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/arrow_codebase.html">Working on the Arrow codebase 🧐</a></li>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/testing.html">Testing 🧪</a></li>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/styling.html">Styling 😎</a></li>
<li class="toctree-l3"><a class="reference internal" href="../step_by_step/pr_lifecycle.html">Lifecycle of a pull request</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../documentation.html">Helping with documentation</a></li>
<li class="toctree-l2 current active has-children"><a class="reference internal" href="index.html">Tutorials</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Python tutorial</a></li>
<li class="toctree-l3"><a class="reference internal" href="r_tutorial.html">R tutorials</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../resources.html">Additional information and resources</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../overview.html">Contributing Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reviewing.html">Reviewing contributions</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../cpp/index.html">C++ Development</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../cpp/building.html">Building Arrow C++</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../cpp/development.html">Development Guidelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../cpp/windows.html">Developing on Windows</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../cpp/emscripten.html">Cross compiling for WebAssembly with Emscripten</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../cpp/conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../cpp/fuzzing.html">Fuzzing Arrow C++</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../java/index.html">Java Development</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../java/building.html">Building Arrow Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../java/development.html">Development Guidelines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../python.html">Python Development</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../continuous_integration/index.html">Continuous Integration</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../continuous_integration/overview.html">Continuous Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../continuous_integration/docker.html">Running Docker Builds</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../continuous_integration/archery.html">Daily Development using Archery</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../continuous_integration/crossbow.html">Packaging and Testing with Crossbow</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../documentation.html">Building the Documentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../release.html">Release Management Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../release_verification.html">Release Verification Process</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Development</a></li>
<li class="breadcrumb-item"><i class="fa-solid fa-ellipsis"></i></li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Tutorials</a></li>
<li class="breadcrumb-item active" aria-current="page">Python tutorial</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="python-tutorial">
<span id="id1"></span><h1>Python tutorial<a class="headerlink" href="#python-tutorial" title="Permalink to this heading">#</a></h1>
<p>In this tutorial we will make an actual feature contribution to
Arrow following the steps specified by <a class="reference internal" href="../index.html#quick-ref-guide"><span class="std std-ref">Quick Reference</span></a>
section of the guide and a more detailed <a class="reference internal" href="../step_by_step/index.html#step-by-step"><span class="std std-ref">Steps in making your first PR</span></a>
section. Navigate there whenever there is some information
you may find is missing here.</p>
<p>The feature contribution will be added to the compute module
in PyArrow. But you can also follow the steps in case you are
correcting a bug or adding a binding.</p>
<p>This tutorial is different from the <a class="reference internal" href="../step_by_step/index.html#step-by-step"><span class="std std-ref">Steps in making your first PR</span></a> as we
will be working on a specific case. This tutorial is not meant
as a step-by-step guide.</p>
<p><strong>Let’s start!</strong></p>
<section id="set-up">
<h2>Set up<a class="headerlink" href="#set-up" title="Permalink to this heading">#</a></h2>
<p>Let’s set up the Arrow repository. We presume here that Git is
already installed. Otherwise please see the <a class="reference internal" href="../step_by_step/set_up.html#set-up"><span class="std std-ref">Set up</span></a> section.</p>
<p>Once the <a class="reference external" href="https://github.com/apache/arrow">Apache Arrow repository</a>
is forked we will clone it and add the link of the main repository
to our upstream.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/&lt;your<span class="w"> </span>username&gt;/arrow.git
<span class="gp">$ </span><span class="nb">cd</span><span class="w"> </span>arrow
<span class="gp">$ </span>git<span class="w"> </span>remote<span class="w"> </span>add<span class="w"> </span>upstream<span class="w"> </span>https://github.com/apache/arrow
</pre></div>
</div>
</section>
<section id="building-pyarrow">
<h2>Building PyArrow<a class="headerlink" href="#building-pyarrow" title="Permalink to this heading">#</a></h2>
<p>Script for building PyArrow differs depending on the Operating
System you are using. For this reason we will only refer to
the instructions for the building process in this tutorial.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>For the <strong>introduction</strong> to the building process refer to the
<a class="reference internal" href="../step_by_step/building.html#build-arrow-guide"><span class="std std-ref">Building the Arrow libraries 🏋🏿‍♀️</span></a> section.</p>
<p>For the <strong>instructions</strong> on how to build PyArrow refer to the
<a class="reference internal" href="../../python.html#build-pyarrow"><span class="std std-ref">Building on Linux and macOS</span></a> section.</p>
</div>
</section>
<section id="create-a-github-issue-for-the-new-feature">
<h2>Create a GitHub issue for the new feature<a class="headerlink" href="#create-a-github-issue-for-the-new-feature" title="Permalink to this heading">#</a></h2>
<p>We will add a new feature that imitates an existing function
<code class="docutils literal notranslate"><span class="pre">min_max</span></code> from the <code class="docutils literal notranslate"><span class="pre">arrow.compute</span></code> module but makes the
interval bigger by 1 in both directions. Note that this is a
made-up function for the purpose of this guide.</p>
<p>See the example of the <code class="docutils literal notranslate"><span class="pre">pc.min_max</span></code> in
<a class="reference external" href="https://arrow.apache.org/cookbook/py/data.html#computing-mean-min-max-values-of-an-array">this link</a>.</p>
<p>First we need to create a GitHub issue as it doesn’t exist yet.
With a GitHub account created we will navigate to the
<a class="reference external" href="https://github.com/apache/arrow/issues">GitHub issue dashboard</a>
and click on the <strong>New issue</strong> button.</p>
<p>We should make sure to assign ourselves to the issue to let others
know we are working on it. You can do that with adding a comment
<code class="docutils literal notranslate"><span class="pre">take</span></code> to the issue created.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>To get more information on GitHub issues go to
<a class="reference internal" href="../step_by_step/finding_issues.html#finding-issues"><span class="std std-ref">Finding good first issues 🔎</span></a> part of the guide.</p>
</div>
</section>
<section id="start-the-work-on-a-new-branch">
<h2>Start the work on a new branch<a class="headerlink" href="#start-the-work-on-a-new-branch" title="Permalink to this heading">#</a></h2>
<p>Before we start working on adding the feature we should
create a new branch from the updated main branch.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>checkout<span class="w"> </span>main
<span class="gp">$ </span>git<span class="w"> </span>fetch<span class="w"> </span>upstream
<span class="gp">$ </span>git<span class="w"> </span>pull<span class="w"> </span>--ff-only<span class="w"> </span>upstream<span class="w"> </span>main
<span class="gp">$ </span>git<span class="w"> </span>checkout<span class="w"> </span>-b<span class="w"> </span>ARROW-14977
</pre></div>
</div>
<p>Let’s research the Arrow library to see where the <code class="docutils literal notranslate"><span class="pre">pc.min_max</span></code>
function is defined/connected with the C++ and get an idea
where we could implement the new feature.</p>
<figure class="align-default" id="id2">
<a class="reference internal image-reference" href="../../../_images/python_tutorial_github_search.jpeg"><img alt="Apache Arrow GitHub repository dashboard where we are searching for a pc.min_max function reference." src="../../../_images/python_tutorial_github_search.jpeg" style="width: 698.0px; height: 274.0px;" /></a>
<figcaption>
<p><span class="caption-text">We could try to search for the function reference in a
GitHub Apache Arrow repository.</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
</figcaption>
</figure>
<figure class="align-default" id="id3">
<a class="reference internal image-reference" href="../../../_images/python_tutorial_github_find_in_file.jpeg"><img alt="In the GitHub repository we are searching through the test_compute.py file for the pc.min_max function." src="../../../_images/python_tutorial_github_find_in_file.jpeg" style="width: 612.0px; height: 244.5px;" /></a>
<figcaption>
<p><span class="caption-text">And search through the <code class="docutils literal notranslate"><span class="pre">test_compute.py</span></code> file in <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>
folder.</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
</figcaption>
</figure>
<p>From the search we can see that the function is tested in the
<code class="docutils literal notranslate"><span class="pre">python/pyarrow/tests/test_compute.py</span></code> file that would mean the
function is defined in the <code class="docutils literal notranslate"><span class="pre">compute.py</span></code> file.</p>
<p>After examining the <code class="docutils literal notranslate"><span class="pre">compute.py</span></code> file we can see that together
with <code class="docutils literal notranslate"><span class="pre">_compute.pyx</span></code> the functions from C++ get wrapped into Python.
We will define the new feature at the end of the <code class="docutils literal notranslate"><span class="pre">compute.py</span></code> file.</p>
<p>Lets run some code in the Python console from <code class="docutils literal notranslate"><span class="pre">arrow/python</span></code>
directory in order to learn more about <code class="docutils literal notranslate"><span class="pre">pc.min_max</span></code>.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">cd</span><span class="w"> </span>python
<span class="gp">$ </span>python
<span class="go">Python 3.9.7 (default, Oct 22 2021, 13:24:00)</span>
<span class="go">[Clang 13.0.0 (clang-1300.0.29.3)] on darwin</span>
<span class="go">Type &quot;help&quot;, &quot;copyright&quot;, &quot;credits&quot; or &quot;license&quot; for more information.</span>
</pre></div>
</div>
<p>We have entered into the Python console from the shell and we can
do some research:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pyarrow.compute</span> <span class="k">as</span> <span class="nn">pc</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span>
<span class="go">[4, 5, 6, None, 1]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pc</span><span class="o">.</span><span class="n">min_max</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="go">&lt;pyarrow.StructScalar: [(&#39;min&#39;, 1), (&#39;max&#39;, 6)]&gt;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pc</span><span class="o">.</span><span class="n">min_max</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">skip_nulls</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">&lt;pyarrow.StructScalar: [(&#39;min&#39;, None), (&#39;max&#39;, None)]&gt;</span>
</pre></div>
</div>
<p>We will call our new feature <code class="docutils literal notranslate"><span class="pre">pc.tutorial_min_max</span></code>. We want the
result from our function, that takes the same input data, to be
<code class="docutils literal notranslate"><span class="pre">[('min-',</span> <span class="pre">0),</span> <span class="pre">('max+',</span> <span class="pre">7)]</span></code>. If we specify that the null value should be
included, the result should be equal to <code class="docutils literal notranslate"><span class="pre">pc.min_max</span></code> that is
<code class="docutils literal notranslate"><span class="pre">[('min',</span> <span class="pre">None),</span> <span class="pre">('max',</span> <span class="pre">None)]</span></code>.</p>
<p>Lets add the first trial code into <code class="docutils literal notranslate"><span class="pre">arrow/python/pyarrow/compute.py</span></code>
where we first test the call to the “min_max” function from C++:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">tutorial_min_max</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">skip_nulls</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Add docstrings</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> values : Array</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> result : TODO</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; import pyarrow.compute as pc</span>
<span class="sd"> &gt;&gt;&gt; data = [4, 5, 6, None, 1]</span>
<span class="sd"> &gt;&gt;&gt; pc.tutorial_min_max(data)</span>
<span class="sd"> &lt;pyarrow.StructScalar: [(&#39;min-&#39;, 0), (&#39;max+&#39;, 7)]&gt;</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">options</span> <span class="o">=</span> <span class="n">ScalarAggregateOptions</span><span class="p">(</span><span class="n">skip_nulls</span><span class="o">=</span><span class="n">skip_nulls</span><span class="p">)</span>
<span class="k">return</span> <span class="n">call_function</span><span class="p">(</span><span class="s2">&quot;min_max&quot;</span><span class="p">,</span> <span class="p">[</span><span class="n">values</span><span class="p">],</span> <span class="n">options</span><span class="p">)</span>
</pre></div>
</div>
<p>To see if this works we will need to import <code class="docutils literal notranslate"><span class="pre">pyarrow.compute</span></code>
again and try:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pyarrow.compute</span> <span class="k">as</span> <span class="nn">pc</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pc</span><span class="o">.</span><span class="n">tutorial_min_max</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="go">&lt;pyarrow.StructScalar: [(&#39;min&#39;, 1), (&#39;max&#39;, 6)]&gt;</span>
</pre></div>
</div>
<p>It’s working. Now we must correct the limits to get the corrected
interval. To do that we have to do some research on <code class="docutils literal notranslate"><span class="pre">pyarrow.StructScalar</span></code>.
In <a class="reference external" href="https://github.com/apache/arrow/blob/994074d2e7ff073301e0959dbc5bb595a1e2a41b/python/pyarrow/tests/test_scalars.py#L547-L553">test_scalars.py</a>
under the <code class="docutils literal notranslate"><span class="pre">test_struct_duplicate_fields</span></code> we can see an example
of how the <code class="docutils literal notranslate"><span class="pre">StructScalar</span></code> is created. We could again run the
Python console and try creating one ourselves.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pyarrow</span> <span class="k">as</span> <span class="nn">pa</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ty</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">struct</span><span class="p">([</span>
<span class="gp">... </span> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;min-&#39;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()),</span>
<span class="gp">... </span> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;max+&#39;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()),</span>
<span class="gp">... </span> <span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pa</span><span class="o">.</span><span class="n">scalar</span><span class="p">([(</span><span class="s1">&#39;min-&#39;</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;max+&#39;</span><span class="p">,</span> <span class="mi">9</span><span class="p">)],</span> <span class="nb">type</span><span class="o">=</span><span class="n">ty</span><span class="p">)</span>
<span class="go">&lt;pyarrow.StructScalar: [(&#39;min-&#39;, 3), (&#39;max+&#39;, 9)]&gt;</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>In cases where we don’t yet have good documentation, unit tests
can be a good place to look for code examples.</p>
</div>
<p>With the new gained knowledge about <code class="docutils literal notranslate"><span class="pre">StructScalar</span></code> and additional
options for the <code class="docutils literal notranslate"><span class="pre">pc.min_max</span></code> function we can finish the work.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">tutorial_min_max</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">skip_nulls</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the minimum-1 and maximum+1 values of a numeric array.</span>
<span class="sd"> This is a made-up feature for the tutorial purposes.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> values : Array</span>
<span class="sd"> skip_nulls : bool, default True</span>
<span class="sd"> If True, ignore nulls in the input.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> result : StructScalar of min-1 and max+1</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; import pyarrow.compute as pc</span>
<span class="sd"> &gt;&gt;&gt; data = [4, 5, 6, None, 1]</span>
<span class="sd"> &gt;&gt;&gt; pc.tutorial_min_max(data)</span>
<span class="sd"> &lt;pyarrow.StructScalar: [(&#39;min-&#39;, 0), (&#39;max+&#39;, 7)]&gt;</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">options</span> <span class="o">=</span> <span class="n">ScalarAggregateOptions</span><span class="p">(</span><span class="n">skip_nulls</span><span class="o">=</span><span class="n">skip_nulls</span><span class="p">)</span>
<span class="n">min_max</span> <span class="o">=</span> <span class="n">call_function</span><span class="p">(</span><span class="s2">&quot;min_max&quot;</span><span class="p">,</span> <span class="p">[</span><span class="n">values</span><span class="p">],</span> <span class="n">options</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_max</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">min_t</span> <span class="o">=</span> <span class="n">min_max</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span><span class="o">-</span><span class="mi">1</span>
<span class="n">max_t</span> <span class="o">=</span> <span class="n">min_max</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span><span class="o">+</span><span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">min_t</span> <span class="o">=</span> <span class="n">min_max</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span>
<span class="n">max_t</span> <span class="o">=</span> <span class="n">min_max</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span>
<span class="n">ty</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">struct</span><span class="p">([</span>
<span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;min-&#39;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()),</span>
<span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s1">&#39;max+&#39;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()),</span>
<span class="p">])</span>
<span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">scalar</span><span class="p">([(</span><span class="s1">&#39;min-&#39;</span><span class="p">,</span> <span class="n">min_t</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;max+&#39;</span><span class="p">,</span> <span class="n">max_t</span><span class="p">)],</span> <span class="nb">type</span><span class="o">=</span><span class="n">ty</span><span class="p">)</span>
</pre></div>
</div>
</section>
<section id="adding-a-test">
<h2>Adding a test<a class="headerlink" href="#adding-a-test" title="Permalink to this heading">#</a></h2>
<p>Now we should add a unit test to <code class="docutils literal notranslate"><span class="pre">python/pyarrow/tests/test_compute.py</span></code>
and run the pytest.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">test_tutorial_min_max</span><span class="p">():</span>
<span class="n">arr</span> <span class="o">=</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="n">l1</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;min-&#39;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;max+&#39;</span><span class="p">:</span> <span class="mi">7</span><span class="p">}</span>
<span class="n">l2</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;min-&#39;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;max+&#39;</span><span class="p">:</span> <span class="kc">None</span><span class="p">}</span>
<span class="k">assert</span> <span class="n">pc</span><span class="o">.</span><span class="n">tutorial_min_max</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> <span class="o">==</span> <span class="n">l1</span>
<span class="k">assert</span> <span class="n">pc</span><span class="o">.</span><span class="n">tutorial_min_max</span><span class="p">(</span><span class="n">arr</span><span class="p">,</span>
<span class="n">skip_nulls</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> <span class="o">==</span> <span class="n">l2</span>
</pre></div>
</div>
<p>With the unit test added we can run the pytest from the shell. To run
a specific unit test, pass in the test name to the <code class="docutils literal notranslate"><span class="pre">-k</span></code> parameter.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nb">cd</span><span class="w"> </span>python
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>pyarrow/tests/test_compute.py<span class="w"> </span>-k<span class="w"> </span>test_tutorial_min_max
<span class="go">======================== test session starts ==========================</span>
<span class="go">platform darwin -- Python 3.9.7, pytest-6.2.5, py-1.10.0, pluggy-1.0.0</span>
<span class="go">rootdir: /Users/alenkafrim/repos/arrow/python, configfile: setup.cfg</span>
<span class="go">plugins: hypothesis-6.24.1, lazy-fixture-0.6.3</span>
<span class="go">collected 204 items / 203 deselected / 1 selected</span>
<span class="go">pyarrow/tests/test_compute.py . [100%]</span>
<span class="go">======================== 1 passed, 203 deselected in 0.16s ============</span>
<span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>pyarrow/tests/test_compute.py
<span class="go">======================== test session starts ===========================</span>
<span class="go">platform darwin -- Python 3.9.7, pytest-6.2.5, py-1.10.0, pluggy-1.0.0</span>
<span class="go">rootdir: /Users/alenkafrim/repos/arrow/python, configfile: setup.cfg</span>
<span class="go">plugins: hypothesis-6.24.1, lazy-fixture-0.6.3</span>
<span class="go">collected 204 items</span>
<span class="go">pyarrow/tests/test_compute.py ................................... [ 46%]</span>
<span class="go">................................................. [100%]</span>
<span class="go">========================= 204 passed in 0.49s ==========================</span>
</pre></div>
</div>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>For more information about testing see <a class="reference internal" href="../step_by_step/testing.html#testing"><span class="std std-ref">Testing 🧪</span></a> section.</p>
</div>
</section>
<section id="check-styling">
<h2>Check styling<a class="headerlink" href="#check-styling" title="Permalink to this heading">#</a></h2>
<p>At the end we also need to check the styling. In Arrow we use a
utility called <a class="reference external" href="https://arrow.apache.org/docs/developers/archery.html">Archery</a>
to check if code is in line with PEP 8 style guide.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>lint<span class="w"> </span>--python<span class="w"> </span>--fix
<span class="go">INFO:archery:Running Python formatter (autopep8)</span>
<span class="go">INFO:archery:Running Python linter (flake8)</span>
<span class="go">/Users/alenkafrim/repos/arrow/python/pyarrow/tests/test_compute.py:2288:80: E501 line too long (88 &gt; 79 characters)</span>
</pre></div>
</div>
<p>With the <code class="docutils literal notranslate"><span class="pre">--fix</span></code> command Archery will attempt to fix style issues,
but some issues like line length can’t be fixed automatically.
We should make the necessary corrections ourselves and run
Archery again.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>lint<span class="w"> </span>--python<span class="w"> </span>--fix
<span class="go">INFO:archery:Running Python formatter (autopep8)</span>
<span class="go">INFO:archery:Running Python linter (flake8)</span>
</pre></div>
</div>
<p>Done. Now lets make the Pull Request!</p>
</section>
<section id="creating-a-pull-request">
<h2>Creating a Pull Request<a class="headerlink" href="#creating-a-pull-request" title="Permalink to this heading">#</a></h2>
<p>First let’s review our changes in the shell using
<code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">status</span></code> to see which files have been changed and to
commit only the ones we are working on.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>status
<span class="go">On branch ARROW-14977</span>
<span class="go">Changes not staged for commit:</span>
<span class="go"> (use &quot;git add &lt;file&gt;...&quot; to update what will be committed)</span>
<span class="go"> (use &quot;git restore &lt;file&gt;...&quot; to discard changes in working directory)</span>
<span class="go"> modified: python/pyarrow/compute.py</span>
<span class="go"> modified: python/pyarrow/tests/test_compute.py</span>
<span class="go">no changes added to commit (use &quot;git add&quot; and/or &quot;git commit -a&quot;)</span>
</pre></div>
</div>
<p>And <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">diff</span></code> to see the changes in the files
in order to spot any error we might have made.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>diff
<span class="go">diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py</span>
<span class="go">index 9dac606c3..e8fc775d8 100644</span>
<span class="go">--- a/python/pyarrow/compute.py</span>
<span class="go">+++ b/python/pyarrow/compute.py</span>
<span class="go">@@ -774,3 +774,45 @@ def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):</span>
<span class="go"> sort_keys = map(lambda key_name: (key_name, &quot;ascending&quot;), sort_keys)</span>
<span class="go"> options = SelectKOptions(k, sort_keys)</span>
<span class="go"> return call_function(&quot;select_k_unstable&quot;, [values], options, memory_pool)</span>
<span class="go">+</span>
<span class="go">+</span>
<span class="go">+def tutorial_min_max(values, skip_nulls=True):</span>
<span class="go">+ &quot;&quot;&quot;</span>
<span class="go">+ Compute the minimum-1 and maximum-1 values of a numeric array.</span>
<span class="go">+</span>
<span class="go">+ This is a made-up feature for the tutorial purposes.</span>
<span class="go">+</span>
<span class="go">+ Parameters</span>
<span class="go">+ ----------</span>
<span class="go">+ values : Array</span>
<span class="go">+ skip_nulls : bool, default True</span>
<span class="go">+ If True, ignore nulls in the input.</span>
<span class="go">+</span>
<span class="go">+ Returns</span>
<span class="go">+ -------</span>
<span class="go">+ result : StructScalar of min-1 and max+1</span>
<span class="go">+</span>
<span class="go">+ Examples</span>
<span class="go">+ --------</span>
<span class="go">+ &gt;&gt;&gt; import pyarrow.compute as pc</span>
<span class="go">+ &gt;&gt;&gt; data = [4, 5, 6, None, 1]</span>
<span class="go">+ &gt;&gt;&gt; pc.tutorial_min_max(data)</span>
<span class="go">+ &lt;pyarrow.StructScalar: [(&#39;min-&#39;, 0), (&#39;max+&#39;, 7)]&gt;</span>
<span class="go">+ &quot;&quot;&quot;</span>
<span class="go">+</span>
<span class="go">+ options = ScalarAggregateOptions(skip_nulls=skip_nulls)</span>
<span class="go">+ min_max = call_function(&quot;min_max&quot;, [values], options)</span>
<span class="go">+</span>
<span class="go">...</span>
</pre></div>
</div>
<p>Everything looks OK. Now we can make the commit (save our changes
to the branch history):</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>commit<span class="w"> </span>-am<span class="w"> </span><span class="s2">&quot;Adding a new compute feature for tutorial purposes&quot;</span>
<span class="go">[ARROW-14977 170ef85be] Adding a new compute feature for tutorial purposes</span>
<span class="go"> 2 files changed, 51 insertions(+)</span>
</pre></div>
</div>
<p>We can use <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">log</span></code> to check the history of commits:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>log
<span class="go">commit 170ef85beb8ee629be651e3f93bcc4a69e29cfb8 (HEAD -&gt; ARROW-14977)</span>
<span class="go">Author: Alenka Frim &lt;frim.alenka@gmail.com&gt;</span>
<span class="go">Date: Tue Dec 7 13:45:06 2021 +0100</span>
<span class="go"> Adding a new compute feature for tutorial purposes</span>
<span class="go">commit 8cebc4948ab5c5792c20a3f463e2043e01c49828 (main)</span>
<span class="go">Author: Sutou Kouhei &lt;kou@clear-code.com&gt;</span>
<span class="go">Date: Sun Dec 5 15:19:46 2021 +0900</span>
<span class="go"> ARROW-14981: [CI][Docs] Upload built documents</span>
<span class="go"> We can use this in release process instead of building on release</span>
<span class="go"> manager&#39;s local environment.</span>
<span class="go"> Closes #11856 from kou/ci-docs-upload</span>
<span class="go"> Authored-by: Sutou Kouhei &lt;kou@clear-code.com&gt;</span>
<span class="go"> Signed-off-by: Sutou Kouhei &lt;kou@clear-code.com&gt;</span>
<span class="go">...</span>
</pre></div>
</div>
<p>If we would started the branch some time ago, we may need to rebase to
upstream main to make sure there are no merge conflicts:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>pull<span class="w"> </span>upstream<span class="w"> </span>main<span class="w"> </span>--rebase
</pre></div>
</div>
<p>And now we can push our work to the forked Arrow repository on GitHub
called <code class="docutils literal notranslate"><span class="pre">origin</span></code>.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>git<span class="w"> </span>push<span class="w"> </span>origin<span class="w"> </span>ARROW-14977
<span class="go">Enumerating objects: 13, done.</span>
<span class="go">Counting objects: 100% (13/13), done.</span>
<span class="go">Delta compression using up to 8 threads</span>
<span class="go">Compressing objects: 100% (7/7), done.</span>
<span class="go">Writing objects: 100% (7/7), 1.19 KiB | 1.19 MiB/s, done.</span>
<span class="go">Total 7 (delta 6), reused 0 (delta 0), pack-reused 0</span>
<span class="go">remote: Resolving deltas: 100% (6/6), completed with 6 local objects.</span>
<span class="go">remote:</span>
<span class="go">remote: Create a pull request for &#39;ARROW-14977&#39; on GitHub by visiting:</span>
<span class="go">remote: https://github.com/AlenkaF/arrow/pull/new/ARROW-14977</span>
<span class="go">remote:</span>
<span class="go">To https://github.com/AlenkaF/arrow.git</span>
<span class="go"> * [new branch] ARROW-14977 -&gt; ARROW-14977</span>
</pre></div>
</div>
<p>Now we have to go to the <a class="reference external" href="https://github.com/apache/arrow">Arrow repository on GitHub</a>
to create a Pull Request. On the GitHub Arrow
page (main or forked) we will see a yellow notice
bar with a note that we made recent pushes to the branch
ARROW-14977. That’s great, now we can make the Pull Request
by clicking on <strong>Compare &amp; pull request</strong>.</p>
<figure class="align-default" id="id4">
<a class="reference internal image-reference" href="../../../_images/python_tutorial_github_pr_notice.jpeg"><img alt="GitHub page of the Apache Arrow repository showing a notice bar indicating change has been made in our branch and a Pull Request can be created." src="../../../_images/python_tutorial_github_pr_notice.jpeg" style="width: 616.0px; height: 258.0px;" /></a>
<figcaption>
<p><span class="caption-text">Notice bar on the Apache Arrow repository.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
</figcaption>
</figure>
<p>First we need to change the Title to <em>ARROW-14977: [Python] Add a “made-up”
feature for the guide tutorial</em> in order to match it
with the issue. Note a punctuation mark was added!</p>
<p><em>Extra note: when this tutorial was created, we had been using the Jira issue
tracker. As we are currently using GitHub issues, the title would be prefixed
with GH-14977: [Python] Add a “made-up” feature for the guide tutorial</em>.</p>
<p>We will also add a description to make it clear to others what we are
trying to do.</p>
<p>Once I click <strong>Create pull request</strong> my code can be reviewed as a
Pull Request in the Apache Arrow repository.</p>
<figure class="align-default" id="id5">
<a class="reference internal image-reference" href="../../../_images/python_tutorial_pr.jpeg"><img alt="GitHub page of the Pull Request showing the title and a description." src="../../../_images/python_tutorial_pr.jpeg" style="width: 567.5px; height: 355.5px;" /></a>
<figcaption>
<p><span class="caption-text">Here it is, our Pull Request!</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
</figcaption>
</figure>
<p>The Pull Request gets connected to the issue and the CI is
running. After some time passes and we get a review we can correct
the code, comment, resolve conversations and so on. The Pull Request
we made can be viewed <a class="reference external" href="https://github.com/apache/arrow/pull/11900">here</a>.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>For more information about Pull Request workflow see <a class="reference internal" href="../step_by_step/pr_lifecycle.html#pr-lifecycle"><span class="std std-ref">Lifecycle of a pull request</span></a>.</p>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="index.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Tutorials</p>
</div>
</a>
<a class="right-next"
href="r_tutorial.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">R tutorials</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#set-up">Set up</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#building-pyarrow">Building PyArrow</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#create-a-github-issue-for-the-new-feature">Create a GitHub issue for the new feature</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#start-the-work-on-a-new-branch">Start the work on a new branch</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adding-a-test">Adding a test</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#check-styling">Check styling</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#creating-a-pull-request">Creating a Pull Request</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/developers/guide/tutorials/python_tutorial.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>