blob: 697ae880f2a4722337c7546a61650f20a2877ce6 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>pyarrow.RecordBatch &mdash; Apache Arrow v2.0.0</title>
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/language_data.js"></script>
<script type="text/javascript" src="../../_static/js/theme.js"></script>
<link rel="canonical" href="https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatch.html" />
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="pyarrow.Table" href="pyarrow.Table.html" />
<link rel="prev" title="pyarrow.ChunkedArray" href="pyarrow.ChunkedArray.html" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home" alt="Documentation Home"> Apache Arrow
</a>
<div class="version">
2.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Specifications and Protocols</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../format/Versioning.html">Format Versioning and Stability</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Columnar.html">Arrow Columnar Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Integration.html">Integration Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/CDataInterface.html">The Arrow C data interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/CStreamInterface.html">The Arrow C stream interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../format/Other.html">Other Data Structures</a></li>
</ul>
<p class="caption"><span class="caption-text">Libraries</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../status.html">Implementation Status</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/c_glib/">C/GLib</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../cpp/index.html">C++</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md">C#</a></li>
<li class="toctree-l1"><a class="reference external" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../java/index.html">Java</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/js/">JavaScript</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md">MATLAB</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">Python</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../install.html">Installing PyArrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../memory.html">Memory and IO Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data.html">Data Types and In-Memory Data Model</a></li>
<li class="toctree-l2"><a class="reference internal" href="../compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ipc.html">Streaming, Serialization, and IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../filesystems.html">Filesystem Interface</a></li>
<li class="toctree-l2"><a class="reference internal" href="../filesystems_deprecated.html">Filesystem Interface (legacy)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../plasma.html">The Plasma In-Memory Object Store</a></li>
<li class="toctree-l2"><a class="reference internal" href="../numpy.html">NumPy Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pandas.html">Pandas Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../timestamps.html">Timestamps</a></li>
<li class="toctree-l2"><a class="reference internal" href="../csv.html">Reading CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../feather.html">Feather File Format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../parquet.html">Reading and Writing the Apache Parquet Format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cuda.html">CUDA Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extending_types.html">Extending pyarrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extending.html">Using pyarrow from C++ and Cython Code</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="../api.html">API Reference</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../api/datatypes.html">Data Types and Schemas</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/arrays.html">Arrays and Scalars</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/memory.html">Buffers and Memory</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/compute.html">Compute Functions</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/files.html">Streams and File Access</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../api/tables.html">Tables and Tensors</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="../api/tables.html#factory-functions">Factory Functions</a></li>
<li class="toctree-l4 current"><a class="reference internal" href="../api/tables.html#classes">Classes</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/tables.html#tensors">Tensors</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/ipc.html">Serialization and IPC</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/flight.html">Arrow Flight</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/formats.html">Tabular File Formats</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/filesystems.html">Filesystems</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/dataset.html">Dataset</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/plasma.html">Plasma In-Memory Object Store</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/cuda.html">CUDA Integration</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/misc.html">Miscellaneous</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../getting_involved.html">Getting Involved</a></li>
<li class="toctree-l2"><a class="reference internal" href="../benchmarks.html">Benchmarks</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/r/">R</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md">Ruby</a></li>
<li class="toctree-l1"><a class="reference external" href="https://docs.rs/crate/arrow/">Rust</a></li>
</ul>
<p class="caption"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../developers/contributing.html">Contributing to Apache Arrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/cpp/index.html">C++ Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/python.html">Python Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/archery.html">Daily Development using Archery</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/crossbow.html">Packaging and Testing with Crossbow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/docker.html">Running Docker Builds</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../developers/documentation.html">Building the Documentation</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">Apache Arrow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home"></a> &raquo;</li>
<li><a href="../index.html">Python bindings</a> &raquo;</li>
<li><a href="../api.html">API Reference</a> &raquo;</li>
<li><a href="../api/tables.html">Tables and Tensors</a> &raquo;</li>
<li>pyarrow.RecordBatch</li>
<li class="wy-breadcrumbs-aside">
<a href="../../_sources/python/generated/pyarrow.RecordBatch.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="pyarrow-recordbatch">
<h1>pyarrow.RecordBatch<a class="headerlink" href="#pyarrow-recordbatch" title="Permalink to this headline"></a></h1>
<dl class="py class">
<dt id="pyarrow.RecordBatch">
<em class="property">class </em><code class="sig-prename descclassname">pyarrow.</code><code class="sig-name descname">RecordBatch</code><a class="headerlink" href="#pyarrow.RecordBatch" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.lib._PandasConvertible</span></code></p>
<p>Batch of rows of columns of equal length</p>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>Do not call this class’s constructor directly, use one of the
<code class="docutils literal notranslate"><span class="pre">RecordBatch.from_*</span></code> functions instead.</p>
</div>
<dl class="py method">
<dt id="pyarrow.RecordBatch.__init__">
<code class="sig-name descname">__init__</code><span class="sig-paren">(</span><em class="sig-param"><span class="o">*</span><span class="n">args</span></em>, <em class="sig-param"><span class="o">**</span><span class="n">kwargs</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.__init__" title="Permalink to this definition"></a></dt>
<dd><p>Initialize self. See help(type(self)) for accurate signature.</p>
</dd></dl>
<p class="rubric">Methods</p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.__init__" title="pyarrow.RecordBatch.__init__"><code class="xref py py-obj docutils literal notranslate"><span class="pre">__init__</span></code></a>(*args, **kwargs)</p></td>
<td><p>Initialize self.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.column" title="pyarrow.RecordBatch.column"><code class="xref py py-obj docutils literal notranslate"><span class="pre">column</span></code></a>(self, i)</p></td>
<td><p>Select single column from record batch</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.equals" title="pyarrow.RecordBatch.equals"><code class="xref py py-obj docutils literal notranslate"><span class="pre">equals</span></code></a>(self, other, bool check_metadata=False)</p></td>
<td><p>Check if contents of two record batches are equal.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.filter" title="pyarrow.RecordBatch.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">filter</span></code></a>(self, Array mask[, …])</p></td>
<td><p>Select record from a record batch.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.from_arrays" title="pyarrow.RecordBatch.from_arrays"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_arrays</span></code></a>(list arrays[, names, schema, …])</p></td>
<td><p>Construct a RecordBatch from multiple pyarrow.Arrays</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.from_pandas" title="pyarrow.RecordBatch.from_pandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_pandas</span></code></a>(type cls, df, Schema schema=None)</p></td>
<td><p>Convert pandas.DataFrame to an Arrow RecordBatch</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.from_struct_array" title="pyarrow.RecordBatch.from_struct_array"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_struct_array</span></code></a>(StructArray struct_array)</p></td>
<td><p>Construct a RecordBatch from a StructArray.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.replace_schema_metadata" title="pyarrow.RecordBatch.replace_schema_metadata"><code class="xref py py-obj docutils literal notranslate"><span class="pre">replace_schema_metadata</span></code></a>(self[, metadata])</p></td>
<td><p>EXPERIMENTAL: Create shallow copy of record batch by replacing schema key-value metadata with the indicated new metadata (which may be None, which deletes any existing metadata</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.serialize" title="pyarrow.RecordBatch.serialize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">serialize</span></code></a>(self[, memory_pool])</p></td>
<td><p>Write RecordBatch to Buffer as encapsulated IPC message.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.slice" title="pyarrow.RecordBatch.slice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">slice</span></code></a>(self[, offset, length])</p></td>
<td><p>Compute zero-copy slice of this RecordBatch</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.take" title="pyarrow.RecordBatch.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">take</span></code></a>(self, indices)</p></td>
<td><p>Select records from an RecordBatch.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.to_pandas" title="pyarrow.RecordBatch.to_pandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_pandas</span></code></a>(self[, memory_pool, categories, …])</p></td>
<td><p>Convert to a pandas-compatible NumPy array or DataFrame, as appropriate</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.to_pydict" title="pyarrow.RecordBatch.to_pydict"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_pydict</span></code></a>(self)</p></td>
<td><p>Convert the RecordBatch to a dict or OrderedDict.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.to_string" title="pyarrow.RecordBatch.to_string"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_string</span></code></a>(self[, show_metadata])</p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.validate" title="pyarrow.RecordBatch.validate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">validate</span></code></a>(self, *[, full])</p></td>
<td><p>Perform validation checks.</p></td>
</tr>
</tbody>
</table>
<p class="rubric">Attributes</p>
<table class="longtable docutils align-default">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.columns" title="pyarrow.RecordBatch.columns"><code class="xref py py-obj docutils literal notranslate"><span class="pre">columns</span></code></a></p></td>
<td><p>List of all columns in numerical order</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.nbytes" title="pyarrow.RecordBatch.nbytes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">nbytes</span></code></a></p></td>
<td><p>Total number of bytes consumed by the elements of the record batch.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.num_columns" title="pyarrow.RecordBatch.num_columns"><code class="xref py py-obj docutils literal notranslate"><span class="pre">num_columns</span></code></a></p></td>
<td><p>Number of columns</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.num_rows" title="pyarrow.RecordBatch.num_rows"><code class="xref py py-obj docutils literal notranslate"><span class="pre">num_rows</span></code></a></p></td>
<td><p>Number of rows</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyarrow.RecordBatch.schema" title="pyarrow.RecordBatch.schema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">schema</span></code></a></p></td>
<td><p>Schema of the RecordBatch and its columns</p></td>
</tr>
</tbody>
</table>
<dl class="py method">
<dt id="pyarrow.RecordBatch.column">
<code class="sig-name descname">column</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="n">i</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.column" title="Permalink to this definition"></a></dt>
<dd><p>Select single column from record batch</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><strong>column</strong> (<em>pyarrow.Array</em>)</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt id="pyarrow.RecordBatch.columns">
<code class="sig-name descname">columns</code><a class="headerlink" href="#pyarrow.RecordBatch.columns" title="Permalink to this definition"></a></dt>
<dd><p>List of all columns in numerical order</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><em>list of pa.Array</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.equals">
<code class="sig-name descname">equals</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">other</em>, <em class="sig-param">bool check_metadata=False</em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.equals" title="Permalink to this definition"></a></dt>
<dd><p>Check if contents of two record batches are equal.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>other</strong> (<a class="reference internal" href="#pyarrow.RecordBatch" title="pyarrow.RecordBatch"><em>pyarrow.RecordBatch</em></a>) – RecordBatch to compare against.</p></li>
<li><p><strong>check_metadata</strong> (<em>bool</em><em>, </em><em>default False</em>) – Whether schema metadata equality should be checked as well.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><strong>are_equal</strong> (<em>bool</em>)</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.filter">
<code class="sig-name descname">filter</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">Array mask</em>, <em class="sig-param">null_selection_behavior=u'drop'</em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.filter" title="Permalink to this definition"></a></dt>
<dd><p>Select record from a record batch. See pyarrow.compute.filter for full
usage.</p>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.from_arrays">
<em class="property">static </em><code class="sig-name descname">from_arrays</code><span class="sig-paren">(</span><em class="sig-param">list arrays</em>, <em class="sig-param">names=None</em>, <em class="sig-param">schema=None</em>, <em class="sig-param">metadata=None</em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.from_arrays" title="Permalink to this definition"></a></dt>
<dd><p>Construct a RecordBatch from multiple pyarrow.Arrays</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>arrays</strong> (<em>list of pyarrow.Array</em>) – One for each field in RecordBatch</p></li>
<li><p><strong>names</strong> (<em>list of str</em><em>, </em><em>optional</em>) – Names for the batch fields. If not passed, schema must be passed</p></li>
<li><p><strong>schema</strong> (<a class="reference internal" href="pyarrow.Schema.html#pyarrow.Schema" title="pyarrow.Schema"><em>Schema</em></a><em>, </em><em>default None</em>) – Schema for the created batch. If not passed, names must be passed</p></li>
<li><p><strong>metadata</strong> (<em>dict</em><em> or </em><em>Mapping</em><em>, </em><em>default None</em>) – Optional metadata for the schema (if inferred).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><em>pyarrow.RecordBatch</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.from_pandas">
<code class="sig-name descname">from_pandas</code><span class="sig-paren">(</span><em class="sig-param">type cls</em>, <em class="sig-param">df</em>, <em class="sig-param">Schema schema=None</em>, <em class="sig-param">preserve_index=None</em>, <em class="sig-param">nthreads=None</em>, <em class="sig-param">columns=None</em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.from_pandas" title="Permalink to this definition"></a></dt>
<dd><p>Convert pandas.DataFrame to an Arrow RecordBatch</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>df</strong> (<em>pandas.DataFrame</em>) – </p></li>
<li><p><strong>schema</strong> (<a class="reference internal" href="pyarrow.Schema.html#pyarrow.Schema" title="pyarrow.Schema"><em>pyarrow.Schema</em></a><em>, </em><em>optional</em>) – The expected schema of the RecordBatch. This can be used to
indicate the type of columns if we cannot infer it automatically.
If passed, the output will have exactly this schema. Columns
specified in the schema that are not found in the DataFrame columns
or its index will raise an error. Additional columns or index
levels in the DataFrame which are not specified in the schema will
be ignored.</p></li>
<li><p><strong>preserve_index</strong> (<em>bool</em><em>, </em><em>optional</em>) – Whether to store the index as an additional column in the resulting
<code class="docutils literal notranslate"><span class="pre">RecordBatch</span></code>. The default of None will store the index as a
column, except for RangeIndex which is stored as metadata only. Use
<code class="docutils literal notranslate"><span class="pre">preserve_index=True</span></code> to force it to be stored as a column.</p></li>
<li><p><strong>nthreads</strong> (<em>int</em><em>, </em><em>default None</em><em> (</em><em>may use up to system CPU count threads</em><em>)</em>) – If greater than 1, convert columns to Arrow in parallel using
indicated number of threads</p></li>
<li><p><strong>columns</strong> (<em>list</em><em>, </em><em>optional</em>) – List of column to be converted. If None, use all columns.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><em>pyarrow.RecordBatch</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.from_struct_array">
<em class="property">static </em><code class="sig-name descname">from_struct_array</code><span class="sig-paren">(</span><em class="sig-param">StructArray struct_array</em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.from_struct_array" title="Permalink to this definition"></a></dt>
<dd><p>Construct a RecordBatch from a StructArray.</p>
<p>Each field in the StructArray will become a column in the resulting
<code class="docutils literal notranslate"><span class="pre">RecordBatch</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>struct_array</strong> (<a class="reference internal" href="pyarrow.StructArray.html#pyarrow.StructArray" title="pyarrow.StructArray"><em>StructArray</em></a>) – Array to construct the record batch from.</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><em>pyarrow.RecordBatch</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt id="pyarrow.RecordBatch.nbytes">
<code class="sig-name descname">nbytes</code><a class="headerlink" href="#pyarrow.RecordBatch.nbytes" title="Permalink to this definition"></a></dt>
<dd><p>Total number of bytes consumed by the elements of the record batch.</p>
</dd></dl>
<dl class="py attribute">
<dt id="pyarrow.RecordBatch.num_columns">
<code class="sig-name descname">num_columns</code><a class="headerlink" href="#pyarrow.RecordBatch.num_columns" title="Permalink to this definition"></a></dt>
<dd><p>Number of columns</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><em>int</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt id="pyarrow.RecordBatch.num_rows">
<code class="sig-name descname">num_rows</code><a class="headerlink" href="#pyarrow.RecordBatch.num_rows" title="Permalink to this definition"></a></dt>
<dd><p>Number of rows</p>
<p>Due to the definition of a RecordBatch, all columns have the same
number of rows.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><em>int</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.replace_schema_metadata">
<code class="sig-name descname">replace_schema_metadata</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="n">metadata</span><span class="o">=</span><span class="default_value">None</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.replace_schema_metadata" title="Permalink to this definition"></a></dt>
<dd><p>EXPERIMENTAL: Create shallow copy of record batch by replacing schema
key-value metadata with the indicated new metadata (which may be None,
which deletes any existing metadata</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>metadata</strong> (<em>dict</em><em>, </em><em>default None</em>) – </p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><strong>shallow_copy</strong> (<em>RecordBatch</em>)</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt id="pyarrow.RecordBatch.schema">
<code class="sig-name descname">schema</code><a class="headerlink" href="#pyarrow.RecordBatch.schema" title="Permalink to this definition"></a></dt>
<dd><p>Schema of the RecordBatch and its columns</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><em>pyarrow.Schema</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.serialize">
<code class="sig-name descname">serialize</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="n">memory_pool</span><span class="o">=</span><span class="default_value">None</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.serialize" title="Permalink to this definition"></a></dt>
<dd><p>Write RecordBatch to Buffer as encapsulated IPC message.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>memory_pool</strong> (<a class="reference internal" href="pyarrow.MemoryPool.html#pyarrow.MemoryPool" title="pyarrow.MemoryPool"><em>MemoryPool</em></a><em>, </em><em>default None</em>) – Uses default memory pool if not specified</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><strong>serialized</strong> (<em>Buffer</em>)</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.slice">
<code class="sig-name descname">slice</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="n">offset</span><span class="o">=</span><span class="default_value">0</span></em>, <em class="sig-param"><span class="n">length</span><span class="o">=</span><span class="default_value">None</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.slice" title="Permalink to this definition"></a></dt>
<dd><p>Compute zero-copy slice of this RecordBatch</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>offset</strong> (<em>int</em><em>, </em><em>default 0</em>) – Offset from start of record batch to slice</p></li>
<li><p><strong>length</strong> (<em>int</em><em>, </em><em>default None</em>) – Length of slice (default is until end of batch starting from
offset)</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><strong>sliced</strong> (<em>RecordBatch</em>)</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.take">
<code class="sig-name descname">take</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="n">indices</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.take" title="Permalink to this definition"></a></dt>
<dd><p>Select records from an RecordBatch. See pyarrow.compute.take for full
usage.</p>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.to_pandas">
<code class="sig-name descname">to_pandas</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">memory_pool=None</em>, <em class="sig-param">categories=None</em>, <em class="sig-param">bool strings_to_categorical=False</em>, <em class="sig-param">bool zero_copy_only=False</em>, <em class="sig-param">bool integer_object_nulls=False</em>, <em class="sig-param">bool date_as_object=True</em>, <em class="sig-param">bool timestamp_as_object=False</em>, <em class="sig-param">bool use_threads=True</em>, <em class="sig-param">bool deduplicate_objects=True</em>, <em class="sig-param">bool ignore_metadata=False</em>, <em class="sig-param">bool safe=True</em>, <em class="sig-param">bool split_blocks=False</em>, <em class="sig-param">bool self_destruct=False</em>, <em class="sig-param">types_mapper=None</em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.to_pandas" title="Permalink to this definition"></a></dt>
<dd><p>Convert to a pandas-compatible NumPy array or DataFrame, as appropriate</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>memory_pool</strong> (<a class="reference internal" href="pyarrow.MemoryPool.html#pyarrow.MemoryPool" title="pyarrow.MemoryPool"><em>MemoryPool</em></a><em>, </em><em>default None</em>) – Arrow MemoryPool to use for allocations. Uses the default memory
pool is not passed.</p></li>
<li><p><strong>strings_to_categorical</strong> (<em>bool</em><em>, </em><em>default False</em>) – Encode string (UTF8) and binary types to pandas.Categorical.</p></li>
<li><p><strong>categories</strong> (<em>list</em><em>, </em><em>default empty</em>) – List of fields that should be returned as pandas.Categorical. Only
applies to table-like data structures.</p></li>
<li><p><strong>zero_copy_only</strong> (<em>bool</em><em>, </em><em>default False</em>) – Raise an ArrowException if this function call would require copying
the underlying data.</p></li>
<li><p><strong>integer_object_nulls</strong> (<em>bool</em><em>, </em><em>default False</em>) – Cast integers with nulls to objects</p></li>
<li><p><strong>date_as_object</strong> (<em>bool</em><em>, </em><em>default True</em>) – Cast dates to objects. If False, convert to datetime64[ns] dtype.</p></li>
<li><p><strong>timestamp_as_object</strong> (<em>bool</em><em>, </em><em>default False</em>) – Cast non-nanosecond timestamps (np.datetime64) to objects. This is
useful if you have timestamps that don’t fit in the normal date
range of nanosecond timestamps (1678 CE-2262 CE).
If False, all timestamps are converted to datetime64[ns] dtype.</p></li>
<li><p><strong>use_threads</strong> (<em>bool</em><em>, </em><em>default True</em>) – Whether to parallelize the conversion using multiple threads.</p></li>
<li><p><strong>deduplicate_objects</strong> (<em>bool</em><em>, </em><em>default False</em>) – Do not create multiple copies Python objects when created, to save
on memory use. Conversion will be slower.</p></li>
<li><p><strong>ignore_metadata</strong> (<em>bool</em><em>, </em><em>default False</em>) – If True, do not use the ‘pandas’ metadata to reconstruct the
DataFrame index, if present</p></li>
<li><p><strong>safe</strong> (<em>bool</em><em>, </em><em>default True</em>) – For certain data types, a cast is needed in order to store the
data in a pandas DataFrame or Series (e.g. timestamps are always
stored as nanoseconds in pandas). This option controls whether it
is a safe cast or not.</p></li>
<li><p><strong>split_blocks</strong> (<em>bool</em><em>, </em><em>default False</em>) – If True, generate one internal “block” for each column when
creating a pandas.DataFrame from a RecordBatch or Table. While this
can temporarily reduce memory note that various pandas operations
can trigger “consolidation” which may balloon memory use.</p></li>
<li><p><strong>self_destruct</strong> (<em>bool</em><em>, </em><em>default False</em>) – EXPERIMENTAL: If True, attempt to deallocate the originating Arrow
memory while converting the Arrow object to pandas. If you use the
object after calling to_pandas with this option it will crash your
program.</p></li>
<li><p><strong>types_mapper</strong> (<em>function</em><em>, </em><em>default None</em>) – A function mapping a pyarrow DataType to a pandas ExtensionDtype.
This can be used to override the default pandas type for conversion
of built-in pyarrow types or in absence of pandas_metadata in the
Table schema. The function receives a pyarrow DataType and is
expected to return a pandas ExtensionDtype or <code class="docutils literal notranslate"><span class="pre">None</span></code> if the
default conversion should be used for that type. If you have
a dictionary mapping, you can pass <code class="docutils literal notranslate"><span class="pre">dict.get</span></code> as function.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><em>pandas.Series or pandas.DataFrame depending on type of object</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.to_pydict">
<code class="sig-name descname">to_pydict</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.to_pydict" title="Permalink to this definition"></a></dt>
<dd><p>Convert the RecordBatch to a dict or OrderedDict.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p><em>dict</em></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.to_string">
<code class="sig-name descname">to_string</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="n">show_metadata</span><span class="o">=</span><span class="default_value">False</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.to_string" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt id="pyarrow.RecordBatch.validate">
<code class="sig-name descname">validate</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">self</span></em>, <em class="sig-param"><span class="o">*</span></em>, <em class="sig-param"><span class="n">full</span><span class="o">=</span><span class="default_value">False</span></em><span class="sig-paren">)</span><a class="headerlink" href="#pyarrow.RecordBatch.validate" title="Permalink to this definition"></a></dt>
<dd><p>Perform validation checks. An exception is raised if validation fails.</p>
<p>By default only cheap validation checks are run. Pass <cite>full=True</cite>
for thorough validation checks (potentially O(n)).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>full</strong> (<em>bool</em><em>, </em><em>default False</em>) – If True, run expensive checks, otherwise cheap checks only.</p>
</dd>
<dt class="field-even">Raises</dt>
<dd class="field-even"><p><strong>ArrowInvalid</strong></p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="pyarrow.Table.html" class="btn btn-neutral float-right" title="pyarrow.Table" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="pyarrow.ChunkedArray.html" class="btn btn-neutral float-left" title="pyarrow.ChunkedArray" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2016-2019 Apache Software Foundation
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script></body>
</html>