blob: 2ca3efdbd8d11a328b6a351d0f2a925bef73836e [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="./">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Apache Arrow Python Cookbook &#8212; Apache Arrow Python Cookbook documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
<link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=49eeb2a1" />
<script src="_static/documentation_options.js?v=5929fcd5"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="icon" href="_static/favicon.ico"/>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Reading and Writing Data" href="io.html" />
<link rel="stylesheet" href="_static/custom.css" type="text/css" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head><body>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<section id="apache-arrow-python-cookbook">
<h1>Apache Arrow Python Cookbook<a class="headerlink" href="#apache-arrow-python-cookbook" title="Link to this heading"></a></h1>
<p>The Apache Arrow Cookbook is a collection of recipes which demonstrate
how to solve many common tasks that users might need to perform
when working with arrow data. The examples in this cookbook will also
serve as robust and well performing solutions to those tasks.</p>
<p>This cookbook is tested with pyarrow 16.0.0.dev273.</p>
<div class="toctree-wrapper compound">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="io.html">Reading and Writing Data</a><ul>
<li class="toctree-l2"><a class="reference internal" href="io.html#write-a-parquet-file">Write a Parquet file</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-a-parquet-file">Reading a Parquet file</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-a-subset-of-parquet-data">Reading a subset of Parquet data</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#saving-arrow-arrays-to-disk">Saving Arrow Arrays to disk</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#memory-mapping-arrow-arrays-from-disk">Memory Mapping Arrow Arrays from disk</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#writing-csv-files">Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#writing-csv-files-incrementally">Writing CSV files incrementally</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-csv-files">Reading CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#writing-partitioned-datasets">Writing Partitioned Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-partitioned-data">Reading Partitioned data</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-partitioned-data-from-s3">Reading Partitioned Data from S3</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#write-a-feather-file">Write a Feather file</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-a-feather-file">Reading a Feather file</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-line-delimited-json">Reading Line Delimited JSON</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#writing-compressed-data">Writing Compressed Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html#reading-compressed-data">Reading Compressed Data</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="create.html">Creating Arrow Objects</a><ul>
<li class="toctree-l2"><a class="reference internal" href="create.html#creating-arrays">Creating Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="create.html#creating-tables">Creating Tables</a></li>
<li class="toctree-l2"><a class="reference internal" href="create.html#create-table-from-plain-types">Create Table from Plain Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="create.html#creating-record-batches">Creating Record Batches</a></li>
<li class="toctree-l2"><a class="reference internal" href="create.html#store-categorical-data">Store Categorical Data</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="schema.html">Working with Schema</a><ul>
<li class="toctree-l2"><a class="reference internal" href="schema.html#setting-the-data-type-of-an-arrow-array">Setting the data type of an Arrow Array</a></li>
<li class="toctree-l2"><a class="reference internal" href="schema.html#setting-the-schema-of-a-table">Setting the schema of a Table</a></li>
<li class="toctree-l2"><a class="reference internal" href="schema.html#merging-multiple-schemas">Merging multiple schemas</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="data.html">Data Manipulation</a><ul>
<li class="toctree-l2"><a class="reference internal" href="data.html#computing-mean-min-max-values-of-an-array">Computing Mean/Min/Max values of an array</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#counting-occurrences-of-elements">Counting Occurrences of Elements</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#applying-arithmetic-functions-to-arrays">Applying arithmetic functions to arrays.</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#appending-tables-to-an-existing-table">Appending tables to an existing table</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#adding-a-column-to-an-existing-table">Adding a column to an existing Table</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#replacing-a-column-in-an-existing-table">Replacing a column in an existing Table</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#group-a-table">Group a Table</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#sort-a-table">Sort a Table</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#searching-for-values-matching-a-predicate-in-arrays">Searching for values matching a predicate in Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="data.html#filtering-arrays-using-a-mask">Filtering Arrays using a mask</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight</a><ul>
<li class="toctree-l2"><a class="reference internal" href="flight.html#simple-parquet-storage-service-with-arrow-flight">Simple Parquet storage service with Arrow Flight</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html#streaming-parquet-storage-service">Streaming Parquet Storage Service</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html#authentication-with-user-password">Authentication with user/password</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html#securing-connections-with-tls">Securing connections with TLS</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html#propagating-opentelemetry-traces">Propagating OpenTelemetry Traces</a></li>
</ul>
</li>
</ul>
</div>
</section>
<section id="indices-and-tables">
<h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Link to this heading"></a></h1>
<ul class="simple">
<li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li>
<li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li>
<li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li>
</ul>
</section>
</div>
</div>
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<p class="logo">
<a href="#">
<img class="logo" src="_static/arrow-logo_vertical_black-txt_transparent-bg.svg" alt="Logo" />
</a>
</p>
<p>
<iframe src="https://ghbtns.com/github-btn.html?user=apache&repo=arrow-cookbook&type=none&count=true&size=large&v=2"
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
</p>
<h3>Navigation</h3>
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="io.html">Reading and Writing Data</a></li>
<li class="toctree-l1"><a class="reference internal" href="create.html">Creating Arrow Objects</a></li>
<li class="toctree-l1"><a class="reference internal" href="schema.html">Working with Schema</a></li>
<li class="toctree-l1"><a class="reference internal" href="data.html">Data Manipulation</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight</a></li>
</ul>
<hr />
<ul>
<li class="toctree-l1"><a href="https://arrow.apache.org/docs/python/index.html">User Guide</a></li>
<li class="toctree-l1"><a href="https://arrow.apache.org/docs/python/api.html">API Reference</a></li>
</ul>
<div class="relations">
<h3>Related Topics</h3>
<ul>
<li><a href="#">Documentation overview</a><ul>
<li>Next: <a href="io.html" title="next chapter">Reading and Writing Data</a></li>
</ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
<h3 id="searchlabel">Quick search</h3>
<div class="searchformwrapper">
<form class="search" action="search.html" method="get">
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
<input type="submit" value="Go" />
</form>
</div>
</div>
<script>document.getElementById('searchbox').style.display = "block"</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="footer">
&#169;2022, Apache Software Foundation.
|
Powered by <a href="https://www.sphinx-doc.org/">Sphinx 7.2.6</a>
&amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
|
<a href="_sources/index.rst.txt"
rel="nofollow">Page source</a>
</div>
</body>
</html>