blob: a23209dc7a96a1dc2f2f88e0ede7b2cc2992b203 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>pyarrow.dataset.DirectoryPartitioning &mdash; Apache Arrow v2.0.0</title>
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/language_data.js"></script>
<script type="text/javascript" src="../../_static/js/theme.js"></script>
<link rel="canonical" href="https://arrow.apache.org/docs/python/generated/pyarrow.dataset.DirectoryPartitioning.html" />
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="pyarrow.dataset.HivePartitioning" href="pyarrow.dataset.HivePartitioning.html" />
<link rel="prev" title="pyarrow.dataset.PartitioningFactory" href="pyarrow.dataset.PartitioningFactory.html" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home" alt="Documentation Home"> Apache Arrow
</a>
<div class="version">
2.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Specifications and Protocols</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../format/Versioning.html">Format Versioning and Stability</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Columnar.html">Arrow Columnar Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Integration.html">Integration Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/CDataInterface.html">The Arrow C data interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/CStreamInterface.html">The Arrow C stream interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Other.html">Other Data Structures</a></li>
</ul>
<p class="caption"><span class="caption-text">Libraries</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../status.html">Implementation Status</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/c_glib/">C/GLib</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../cpp/index.html">C++</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md">C#</a></li>
<li class="toctree-l1"><a class="reference external" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../java/index.html">Java</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/js/">JavaScript</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md">MATLAB</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">Python</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../install.html">Installing PyArrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../memory.html">Memory and IO Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data.html">Data Types and In-Memory Data Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ipc.html">Streaming, Serialization, and IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../filesystems.html">Filesystem Interface</a></li>
<li class="toctree-l2"><a class="reference internal" href="../filesystems_deprecated.html">Filesystem Interface (legacy)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l2"><a class="reference internal" href="../numpy.html">NumPy Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pandas.html">Pandas Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../timestamps.html">Timestamps</a></li>
<li class="toctree-l2"><a class="reference internal" href="../csv.html">Reading CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../feather.html">Feather File Format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cuda.html">CUDA Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extending_types.html">Extending pyarrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extending.html">Using pyarrow from C++ and Cython Code</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="../api.html">API Reference</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../api/datatypes.html">Data Types and Schemas</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/arrays.html">Arrays and Scalars</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/memory.html">Buffers and Memory</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/compute.html">Compute Functions</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/files.html">Streams and File Access</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/tables.html">Tables and Tensors</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/ipc.html">Serialization and IPC</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/flight.html">Arrow Flight</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/formats.html">Tabular File Formats</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/filesystems.html">Filesystems</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../api/dataset.html">Dataset</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="../api/dataset.html#factory-functions">Factory functions</a></li>
<li class="toctree-l4 current"><a class="reference internal" href="../api/dataset.html#classes">Classes</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/plasma.html">Plasma In-Memory Object Store</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/cuda.html">CUDA Integration</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/misc.html">Miscellaneous</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../getting_involved.html">Getting Involved</a></li>
<li class="toctree-l2"><a class="reference internal" href="../benchmarks.html">Benchmarks</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/r/">R</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md">Ruby</a></li>
<li class="toctree-l1"><a class="reference external" href="https://docs.rs/crate/arrow/">Rust</a></li>
</ul>
<p class="caption"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../developers/contributing.html">Contributing to Apache Arrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/cpp/index.html">C++ Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/python.html">Python Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/archery.html">Daily Development using Archery</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/crossbow.html">Packaging and Testing with Crossbow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/docker.html">Running Docker Builds</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/documentation.html">Building the Documentation</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">Apache Arrow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home"></a> &raquo;</li>
<li><a href="../index.html">Python bindings</a> &raquo;</li>
<li><a href="../api.html">API Reference</a> &raquo;</li>
<li><a href="../api/dataset.html">Dataset</a> &raquo;</li>
<li>pyarrow.dataset.DirectoryPartitioning</li>
<li class="wy-breadcrumbs-aside">
<a href="../../_sources/python/generated/pyarrow.dataset.DirectoryPartitioning.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="pyarrow-dataset-directorypartitioning">
<h1>pyarrow.dataset.DirectoryPartitioning<a class="headerlink" href="#pyarrow-dataset-directorypartitioning" title="Permalink to this headline"></a></h1>
<dl class="py class">
<dt id="pyarrow.dataset.DirectoryPartitioning">
<em class="property">class </em><code class="sig-prename descclassname">pyarrow.dataset.</code><code class="sig-name descname">DirectoryPartitioning</code><a class="headerlink" href="#pyarrow.dataset.DirectoryPartitioning" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow._dataset.Partitioning</span></code></p>
<p>A Partitioning based on a specified Schema.</p>
<p>The DirectoryPartitioning expects one segment in the file path for each
field in the schema (all fields are required to be present).
For example given schema&lt;year:int16, month:int8&gt; the path “/2009/11” would
be parsed to (“year”_ == 2009 and “month”_ == 11).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>schema</strong> (<a class="reference internal" href="pyarrow.Schema.html#pyarrow.Schema" title="pyarrow.Schema"><em>Schema</em></a>) – The schema that describes the partitions present in the file path.</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><em>DirectoryPartitioning</em></p>
</dd>
</dl>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pyarrow.dataset</span> <span class="k">import</span> <span class="n">DirectoryPartitioning</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">partition</span> <span class="o">=</span> <span class="n">DirectoryPartitioning</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([(</span><span class="s2">&quot;year&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">()),</span> <span class="p">(</span><span class="s2">&quot;month&quot;</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">())]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">print</span><span class="p">(</span><span class="n">partitioning</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="s2">&quot;/2009/11&quot;</span><span class="p">))</span>
<span class="go">((year == 2009:int16) and (month == 11:int8))</span>
</pre></div>
</div>
<dl class="py method">
<dt id="pyarrow.dataset.DirectoryPartitioning.__init__">
<code class="sig-name descname">__init__</code><span class="sig-paren">(</span><em class="sig-param"><span class="o">*</span><span class="n">args</span></em>, <em class="sig-param"><span class="o">**</span><span class="n">kwargs</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.dataset.DirectoryPartitioning.__init__" title="Permalink to this definition"></a></dt>
<dd><p>Initialize self. See help(type(self)) for accurate signature.</p>
</dd></dl>
<p class="rubric">Methods</p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.dataset.DirectoryPartitioning.__init__" title="pyarrow.dataset.DirectoryPartitioning.__init__"><code class="xref py py-obj docutils literal notranslate"><span class="pre">__init__</span></code></a>(*args, **kwargs)</p></td>
<td><p>Initialize self.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.dataset.DirectoryPartitioning.discover" title="pyarrow.dataset.DirectoryPartitioning.discover"><code class="xref py py-obj docutils literal notranslate"><span class="pre">discover</span></code></a></p></td>
<td><p>Discover a DirectoryPartitioning.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.dataset.DirectoryPartitioning.parse" title="pyarrow.dataset.DirectoryPartitioning.parse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">parse</span></code></a></p></td>
<td><p></p></td>
</tr>
</tbody>
</table>
<p class="rubric">Attributes</p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.dataset.DirectoryPartitioning.schema" title="pyarrow.dataset.DirectoryPartitioning.schema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">schema</span></code></a></p></td>
<td><p>The arrow Schema attached to the partitioning.</p></td>
</tr>
</tbody>
</table>
<dl class="py method">
<dt id="pyarrow.dataset.DirectoryPartitioning.discover">
<em class="property">static </em><code class="sig-name descname">discover</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.dataset.DirectoryPartitioning.discover" title="Permalink to this definition"></a></dt>
<dd><p>Discover a DirectoryPartitioning.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>field_names</strong> (<em>list of str</em>) – The names to associate with the values from the subdirectory names.</p></li>
<li><p><strong>infer_dictionary</strong> (<em>bool</em><em>, </em><em>default False</em>) – When inferring a schema for partition fields, yield dictionary
encoded types instead of plain types. This can be more efficient
when materializing virtual columns, and Expressions parsed by the
finished Partitioning will include dictionaries of all unique
inspected values for each field.</p></li>
<li><p><strong>max_partition_dictionary_size</strong> (<em>int</em><em>, </em><em>default 0</em>) – Synonymous with infer_dictionary for backwards compatibility with
1.0: setting this to -1 or None is equivalent to passing
infer_dictionary=True.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><em>DirectoryPartitioningFactory</em> – To be used in the FileSystemFactoryOptions.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.dataset.DirectoryPartitioning.parse">
<code class="sig-name descname">parse</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.dataset.DirectoryPartitioning.parse" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt id="pyarrow.dataset.DirectoryPartitioning.schema">
<code class="sig-name descname">schema</code><a class="headerlink" href="#pyarrow.dataset.DirectoryPartitioning.schema" title="Permalink to this definition"></a></dt>
<dd><p>The arrow Schema attached to the partitioning.</p>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="pyarrow.dataset.HivePartitioning.html" class="btn btn-neutral float-right" title="pyarrow.dataset.HivePartitioning" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="pyarrow.dataset.PartitioningFactory.html" class="btn btn-neutral float-left" title="pyarrow.dataset.PartitioningFactory" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2016-2019 Apache Software Foundation
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script></body>
</html>