blob: 3b77260177e13c1855eb27251851d7c692ebede3 [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Integration to Apache Arrow • Arrow R Package</title>
<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css">
<script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet">
<script src="pkgdown.js"></script><script src="extra.js"></script><meta property="og:title" content="Integration to Apache Arrow">
<meta property="og:description" content="Apache Arrow &lt;https://arrow.apache.org/&gt; is a cross-language
development platform for in-memory data. It specifies a standardized
language-independent columnar memory format for flat and hierarchical data,
organized for efficient analytic operations on modern hardware. This
package provides an interface to the Arrow C++ library.">
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-spy="scroll" data-target="#toc">
<div class="container template-home">
<header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">Arrow R Package</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">5.0.0</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="https://arrow.apache.org/">❯❯❯</a>
</li>
<li>
<a href="articles/arrow.html">Get started</a>
</li>
<li>
<a href="reference/index.html">Reference</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
Articles
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="articles/install.html">Installing the Arrow Package on Linux</a>
</li>
<li>
<a href="articles/dataset.html">Working with Arrow Datasets and dplyr</a>
</li>
<li>
<a href="articles/fs.html">Working with Cloud Storage (S3)</a>
</li>
<li>
<a href="articles/python.html">Apache Arrow in Python and R with reticulate</a>
</li>
<li>
<a href="articles/flight.html">Connecting to Flight RPC Servers</a>
</li>
<li>
<a href="articles/developing.html">Arrow R Developer Guide</a>
</li>
</ul>
</li>
<li>
<a href="news/index.html">Changelog</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
Project docs
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="https://arrow.apache.org/docs/format/README.html">Specification</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/c_glib">C GLib</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/cpp">C++</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/java">Java</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/js">JavaScript</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/python">Python</a>
</li>
<li>
<a href="index.html">R</a>
</li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav navbar-right"></ul>
</div>
<!--/.nav-collapse -->
</div>
<!--/.container -->
</div>
<!--/.navbar -->
</header><div class="row">
<div class="contents col-md-9">
<div id="arrow" class="section level1">
<div class="page-header"><h1 class="hasAnchor">
<a href="#arrow" class="anchor"></a>arrow</h1></div>
<p><strong><a href="https://arrow.apache.org/">Apache Arrow</a> is a cross-language development platform for in-memory data.</strong> It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and interprocess communication.</p>
<p><strong>The <code>arrow</code> package exposes an interface to the Arrow C++ library, enabling access to many of its features in R.</strong> It provides low-level access to the Arrow C++ library API and higher-level access through a <code>dplyr</code> backend and familiar R functions.</p>
<div id="what-can-the-arrow-package-do" class="section level2">
<h2 class="hasAnchor">
<a href="#what-can-the-arrow-package-do" class="anchor"></a>What can the <code>arrow</code> package do?</h2>
<ul>
<li>Read and write <strong>Parquet files</strong> (<code><a href="reference/read_parquet.html">read_parquet()</a></code>, <code><a href="reference/write_parquet.html">write_parquet()</a></code>), an efficient and widely used columnar format</li>
<li>Read and write <strong>Feather files</strong> (<code><a href="reference/read_feather.html">read_feather()</a></code>, <code><a href="reference/write_feather.html">write_feather()</a></code>), a format optimized for speed and interoperability</li>
<li>Analyze, process, and write <strong>multi-file, larger-than-memory datasets</strong> (<code><a href="reference/open_dataset.html">open_dataset()</a></code>, <code><a href="reference/write_dataset.html">write_dataset()</a></code>)</li>
<li>Read <strong>large CSV and JSON files</strong> with excellent <strong>speed and efficiency</strong> (<code><a href="reference/read_delim_arrow.html">read_csv_arrow()</a></code>, <code><a href="reference/read_json_arrow.html">read_json_arrow()</a></code>)</li>
<li>Manipulate and analyze Arrow data with <strong><code>dplyr</code> verbs</strong>
</li>
<li>Read and write files in <strong>Amazon S3</strong> buckets with no additional function calls</li>
<li>Exercise <strong>fine control over column types</strong> for seamless interoperability with databases and data warehouse systems</li>
<li>Use <strong>compression codecs</strong> including Snappy, gzip, Brotli, Zstandard, LZ4, LZO, and bzip2 for reading and writing data</li>
<li>Enable <strong>zero-copy data sharing</strong> between <strong>R and Python</strong>
</li>
<li>Connect to <strong>Arrow Flight</strong> RPC servers to send and receive large datasets over networks</li>
<li>Access and manipulate Arrow objects through <strong>low-level bindings</strong> to the C++ library</li>
<li>Provide a <strong>toolkit for building connectors</strong> to other applications and services that use Arrow</li>
</ul>
</div>
<div id="installation" class="section level2">
<h2 class="hasAnchor">
<a href="#installation" class="anchor"></a>Installation</h2>
<div id="installing-the-latest-release-version" class="section level3">
<h3 class="hasAnchor">
<a href="#installing-the-latest-release-version" class="anchor"></a>Installing the latest release version</h3>
<p>Install the latest release of <code>arrow</code> from CRAN with</p>
<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span><span class="op">(</span><span class="st">"arrow"</span><span class="op">)</span></code></pre></div>
<p>Conda users can install <code>arrow</code> from conda-forge with</p>
<pre class="shell"><code>conda install -c conda-forge --strict-channel-priority r-arrow</code></pre>
<p>Installing a released version of the <code>arrow</code> package requires no additional system dependencies. For macOS and Windows, CRAN hosts binary packages that contain the Arrow C++ library. On Linux, source package installation will also build necessary C++ dependencies. For a faster, more complete installation, set the environment variable <code>NOT_CRAN=true</code>. See <code><a href="articles/install.html">vignette("install", package = "arrow")</a></code> for details.</p>
</div>
<div id="installing-a-development-version" class="section level3">
<h3 class="hasAnchor">
<a href="#installing-a-development-version" class="anchor"></a>Installing a development version</h3>
<p>Development versions of the package (binary and source) are built nightly and hosted at <a href="https://arrow-r-nightly.s3.amazonaws.com" class="uri">https://arrow-r-nightly.s3.amazonaws.com</a>. To install from there:</p>
<div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span><span class="op">(</span><span class="st">"arrow"</span>, repos <span class="op">=</span> <span class="st">"https://arrow-r-nightly.s3.amazonaws.com"</span><span class="op">)</span></code></pre></div>
<p>Conda users can install <code>arrow</code> nightly builds with</p>
<pre class="shell"><code>conda install -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow</code></pre>
<p>If you already have a version of <code>arrow</code> installed, you can switch to the latest nightly development version with</p>
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="fu">arrow</span><span class="fu">::</span><span class="fu"><a href="reference/install_arrow.html">install_arrow</a></span><span class="op">(</span>nightly <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></code></pre></div>
<p>These nightly package builds are not official Apache releases and are not recommended for production use. They may be useful for testing bug fixes and new features under active development.</p>
</div>
</div>
<div id="usage" class="section level2">
<h2 class="hasAnchor">
<a href="#usage" class="anchor"></a>Usage</h2>
<p>Among the many applications of the <code>arrow</code> package, two of the most accessible are:</p>
<ul>
<li>High-performance reading and writing of data files with multiple file formats and compression codecs, including built-in support for cloud storage</li>
<li>Analyzing and manipulating bigger-than-memory data with <code>dplyr</code> verbs</li>
</ul>
<p>The sections below describe these two uses and illustrate them with basic examples. The sections below mention two Arrow data structures:</p>
<ul>
<li>
<code>Table</code>: a tabular, column-oriented data structure capable of storing and processing large amounts of data more efficiently than R’s built-in <code>data.frame</code> and with SQL-like column data types that afford better interoperability with databases and data warehouse systems</li>
<li>
<code>Dataset</code>: a data structure functionally similar to <code>Table</code> but with the capability to work on larger-than-memory data partitioned across multiple files</li>
</ul>
<div id="reading-and-writing-data-files-with-arrow" class="section level3">
<h3 class="hasAnchor">
<a href="#reading-and-writing-data-files-with-arrow" class="anchor"></a>Reading and writing data files with <code>arrow</code>
</h3>
<p>The <code>arrow</code> package provides functions for reading single data files in several common formats. By default, calling any of these functions returns an R <code>data.frame</code>. To return an Arrow <code>Table</code>, set argument <code>as_data_frame = FALSE</code>.</p>
<ul>
<li>
<code><a href="reference/read_parquet.html">read_parquet()</a></code>: read a file in Parquet format</li>
<li>
<code><a href="reference/read_feather.html">read_feather()</a></code>: read a file in Feather format (the Apache Arrow IPC format)</li>
<li>
<code><a href="reference/read_delim_arrow.html">read_delim_arrow()</a></code>: read a delimited text file (default delimiter is comma)</li>
<li>
<code><a href="reference/read_delim_arrow.html">read_csv_arrow()</a></code>: read a comma-separated values (CSV) file</li>
<li>
<code><a href="reference/read_delim_arrow.html">read_tsv_arrow()</a></code>: read a tab-separated values (TSV) file</li>
<li>
<code><a href="reference/read_json_arrow.html">read_json_arrow()</a></code>: read a JSON data file</li>
</ul>
<p>For writing data to single files, the <code>arrow</code> package provides the functions <code><a href="reference/write_parquet.html">write_parquet()</a></code> and <code><a href="reference/write_feather.html">write_feather()</a></code>. These can be used with R <code>data.frame</code> and Arrow <code>Table</code> objects.</p>
<p>For example, let’s write the Star Wars characters data that’s included in <code>dplyr</code> to a Parquet file, then read it back in. Parquet is a popular choice for storing analytic data; it is optimized for reduced file sizes and fast read performance, especially for column-based access patterns. Parquet is widely supported by many tools and platforms.</p>
<p>First load the <code>arrow</code> and <code>dplyr</code> packages:</p>
<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/apache/arrow/">arrow</a></span>, warn.conflicts <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
<span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org">dplyr</a></span>, warn.conflicts <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></code></pre></div>
<p>Then write the <code>data.frame</code> named <code>starwars</code> to a Parquet file at <code>file_path</code>:</p>
<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">file_path</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html">tempfile</a></span><span class="op">(</span><span class="op">)</span>
<span class="fu"><a href="reference/write_parquet.html">write_parquet</a></span><span class="op">(</span><span class="va">starwars</span>, <span class="va">file_path</span><span class="op">)</span></code></pre></div>
<p>Then read the Parquet file into an R <code>data.frame</code> named <code>sw</code>:</p>
<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">sw</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span><span class="op">)</span></code></pre></div>
<p>R object attributes are preserved when writing data to Parquet or Feather files and when reading those files back into R. This enables round-trip writing and reading of <code>sf::sf</code> objects, R <code>data.frame</code>s with with <code>haven::labelled</code> columns, and <code>data.frame</code>s with other custom attributes.</p>
<p>For reading and writing larger files or sets of multiple files, <code>arrow</code> defines <code>Dataset</code> objects and provides the functions <code><a href="reference/open_dataset.html">open_dataset()</a></code> and <code><a href="reference/write_dataset.html">write_dataset()</a></code>, which enable analysis and processing of bigger-than-memory data, including the ability to partition data into smaller chunks without loading the full data into memory. For examples of these functions, see <code><a href="articles/dataset.html">vignette("dataset", package = "arrow")</a></code>.</p>
<p>All these functions can read and write files in the local filesystem or in Amazon S3 (by passing S3 URIs beginning with <code>s3://</code>). For more details, see <code><a href="articles/fs.html">vignette("fs", package = "arrow")</a></code></p>
</div>
<div id="using-dplyr-with-arrow" class="section level3">
<h3 class="hasAnchor">
<a href="#using-dplyr-with-arrow" class="anchor"></a>Using <code>dplyr</code> with <code>arrow</code>
</h3>
<p>The <code>arrow</code> package provides a <code>dplyr</code> backend enabling manipulation of Arrow tabular data with <code>dplyr</code> verbs. To use it, first load both packages <code>arrow</code> and <code>dplyr</code>. Then load data into an Arrow <code>Table</code> or <code>Dataset</code> object. For example, read the Parquet file written in the previous example into an Arrow <code>Table</code> named <code>sw</code>:</p>
<div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">sw</span> <span class="op">&lt;-</span> <span class="fu"><a href="reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span>, as_data_frame <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></code></pre></div>
<p>Next, pipe on <code>dplyr</code> verbs:</p>
<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">result</span> <span class="op">&lt;-</span> <span class="va">sw</span> <span class="op">%&gt;%</span>
<span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span><span class="op">(</span><span class="va">homeworld</span> <span class="op">==</span> <span class="st">"Tatooine"</span><span class="op">)</span> <span class="op">%&gt;%</span>
<span class="fu"><a href="https://dplyr.tidyverse.org/reference/rename.html">rename</a></span><span class="op">(</span>height_cm <span class="op">=</span> <span class="va">height</span>, mass_kg <span class="op">=</span> <span class="va">mass</span><span class="op">)</span> <span class="op">%&gt;%</span>
<span class="fu"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span><span class="op">(</span>height_in <span class="op">=</span> <span class="va">height_cm</span> <span class="op">/</span> <span class="fl">2.54</span>, mass_lbs <span class="op">=</span> <span class="va">mass_kg</span> <span class="op">*</span> <span class="fl">2.2046</span><span class="op">)</span> <span class="op">%&gt;%</span>
<span class="fu"><a href="https://dplyr.tidyverse.org/reference/arrange.html">arrange</a></span><span class="op">(</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/desc.html">desc</a></span><span class="op">(</span><span class="va">birth_year</span><span class="op">)</span><span class="op">)</span> <span class="op">%&gt;%</span>
<span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="va">name</span>, <span class="va">height_in</span>, <span class="va">mass_lbs</span><span class="op">)</span></code></pre></div>
<p>The <code>arrow</code> package uses lazy evaluation to delay computation until the result is required. This speeds up processing by enabling the Arrow C++ library to perform multiple computations in one operation. <code>result</code> is an object with class <code>arrow_dplyr_query</code> which represents all the computations to be performed:</p>
<div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">result</span>
<span class="co">#&gt; Table (query)</span>
<span class="co">#&gt; name: string</span>
<span class="co">#&gt; height_in: expr</span>
<span class="co">#&gt; mass_lbs: expr</span>
<span class="co">#&gt;</span>
<span class="co">#&gt; * Filter: equal(homeworld, "Tatooine")</span>
<span class="co">#&gt; * Sorted by birth_year [desc]</span>
<span class="co">#&gt; See $.data for the source Arrow object</span></code></pre></div>
<p>To perform these computations and materialize the result, call <code><a href="https://dplyr.tidyverse.org/reference/compute.html">compute()</a></code> or <code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code>. <code><a href="https://dplyr.tidyverse.org/reference/compute.html">compute()</a></code> returns an Arrow <code>Table</code>, suitable for passing to other <code>arrow</code> or <code>dplyr</code> functions:</p>
<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">result</span> <span class="op">%&gt;%</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/compute.html">compute</a></span><span class="op">(</span><span class="op">)</span>
<span class="co">#&gt; Table</span>
<span class="co">#&gt; 10 rows x 3 columns</span>
<span class="co">#&gt; $name &lt;string&gt;</span>
<span class="co">#&gt; $height_in &lt;double&gt;</span>
<span class="co">#&gt; $mass_lbs &lt;double&gt;</span></code></pre></div>
<p><code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code> returns an R <code>data.frame</code>, suitable for viewing or passing to other R functions for analysis or visualization:</p>
<div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">result</span> <span class="op">%&gt;%</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/compute.html">collect</a></span><span class="op">(</span><span class="op">)</span>
<span class="co">#&gt; # A tibble: 10 x 3</span>
<span class="co">#&gt; name height_in mass_lbs</span>
<span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt;</span>
<span class="co">#&gt; 1 C-3PO 65.7 165.</span>
<span class="co">#&gt; 2 Cliegg Lars 72.0 NA </span>
<span class="co">#&gt; 3 Shmi Skywalker 64.2 NA </span>
<span class="co">#&gt; 4 Owen Lars 70.1 265.</span>
<span class="co">#&gt; 5 Beru Whitesun lars 65.0 165.</span>
<span class="co">#&gt; 6 Darth Vader 79.5 300.</span>
<span class="co">#&gt; 7 Anakin Skywalker 74.0 185.</span>
<span class="co">#&gt; 8 Biggs Darklighter 72.0 185.</span>
<span class="co">#&gt; 9 Luke Skywalker 67.7 170.</span>
<span class="co">#&gt; 10 R5-D4 38.2 70.5</span></code></pre></div>
<p>The <code>arrow</code> package works with most single-table <code>dplyr</code> verbs except those that compute aggregates, such as <code><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarise()</a></code> and <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate()</a></code> after <code><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by()</a></code>. Inside <code>dplyr</code> verbs, Arrow offers support for many functions and operators, with common functions mapped to their base R and tidyverse equivalents. The <a href="https://arrow.apache.org/docs/r/news/index.html">changelog</a> lists many of them. If there are additional functions you would like to see implemented, please file an issue as described in the <a href="#getting-help">Getting help</a> section below.</p>
<p>For <code>dplyr</code> queries on <code>Table</code> objects, if the <code>arrow</code> package detects an unimplemented function within a <code>dplyr</code> verb, it automatically calls <code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code> to return the data as an R <code>data.frame</code> before processing that <code>dplyr</code> verb. For queries on <code>Dataset</code> objects (which can be larger than memory), it raises an error if the function is unimplemented; you need to explicitly tell it to <code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code>.</p>
</div>
<div id="additional-features" class="section level3">
<h3 class="hasAnchor">
<a href="#additional-features" class="anchor"></a>Additional features</h3>
<p>Other applications of <code>arrow</code> are described in the following vignettes:</p>
<ul>
<li>
<code><a href="articles/python.html">vignette("python", package = "arrow")</a></code>: use <code>arrow</code> and <code>reticulate</code> to pass data between R and Python</li>
<li>
<code><a href="articles/flight.html">vignette("flight", package = "arrow")</a></code>: connect to Arrow Flight RPC servers to send and receive data</li>
<li>
<code><a href="articles/arrow.html">vignette("arrow", package = "arrow")</a></code>: access and manipulate Arrow objects through low-level bindings to the C++ library</li>
</ul>
</div>
</div>
<div id="getting-help" class="section level2">
<h2 class="hasAnchor">
<a href="#getting-help" class="anchor"></a>Getting help</h2>
<p>If you encounter a bug, please file an issue with a minimal reproducible example on the <a href="https://issues.apache.org/jira/projects/ARROW/issues">Apache Jira issue tracker</a>. Create an account or log in, then click <strong>Create</strong> to file an issue. Select the project <strong>Apache Arrow (ARROW)</strong>, select the component <strong>R</strong>, and begin the issue summary with <strong><code>[R]</code></strong> followed by a space. For more information, see the <strong>Report bugs and propose features</strong> section of the <a href="https://arrow.apache.org/docs/developers/contributing.html">Contributing to Apache Arrow</a> page in the Arrow developer documentation.</p>
<p>We welcome questions, discussion, and contributions from users of the <code>arrow</code> package. For information about mailing lists and other venues for engaging with the Arrow developer and user communities, please see the <a href="https://arrow.apache.org/community/">Apache Arrow Community</a> page.</p>
<hr>
<p>All participation in the Apache Arrow project is governed by the Apache Software Foundation’s <a href="https://www.apache.org/foundation/policies/conduct.html">code of conduct</a>.</p>
</div>
</div>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<div class="links">
<h2>Links</h2>
<ul class="list-unstyled">
<li>Download from CRAN at <br><a href="https://cloud.r-project.org/package=arrow">https://​cloud.r-project.org/​package=arrow</a>
</li>
<li>Report a bug at <br><a href="https://issues.apache.org/jira/projects/ARROW/issues">https://​issues.apache.org/​jira/​projects/​ARROW/​issues</a>
</li>
</ul>
</div>
<div class="license">
<h2>License</h2>
<ul class="list-unstyled">
<li>Apache License (&gt;= 2.0)</li>
</ul>
</div>
<div class="developers">
<h2>Developers</h2>
<ul class="list-unstyled">
<li>Neal Richardson <br><small class="roles"> Author, maintainer </small> </li>
<li>Ian Cook <br><small class="roles"> Author </small> </li>
<li>Nic Crane <br><small class="roles"> Author </small> </li>
<li>Jonathan Keane <br><small class="roles"> Author </small> </li>
<li>Romain François <br><small class="roles"> Author </small> <a href="https://orcid.org/0000-0002-2444-4226" target="orcid.widget" aria-label="ORCID"><span class="fab fa-orcid orcid" aria-hidden="true"></span></a> </li>
<li>Jeroen Ooms <br><small class="roles"> Author </small> </li>
<li>Apache Arrow <br><small class="roles"> Author, copyright holder </small> </li>
<li><a href="authors.html">All authors...</a></li>
</ul>
</div>
<div class="dev-status">
<h2>Dev status</h2>
<ul class="list-unstyled">
<li><a href="https://cran.r-project.org/package=arrow"><img src="https://www.r-pkg.org/badges/version-last-release/arrow" alt="cran"></a></li>
<li><a href="https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amaster+event%3Apush"><img src="https://github.com/apache/arrow/workflows/R/badge.svg?event=push" alt="CI"></a></li>
<li><a href="https://anaconda.org/conda-forge/r-arrow"><img src="https://img.shields.io/conda/vn/conda-forge/r-arrow.svg" alt="conda-forge"></a></li>
</ul>
</div>
</div>
</div>
<footer><div class="copyright">
<p>Developed by Neal Richardson, Ian Cook, Nic Crane, Jonathan Keane, Romain François, Jeroen Ooms, Apache Arrow.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p>
</div>
</footer>
</div>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body>
</html>