| <!DOCTYPE html> |
| <!-- Generated by pkgdown: do not edit by hand --><html lang="en"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Integration to Apache Arrow • Arrow R Package</title> |
| <!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous"> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"> |
| <script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"> |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"> |
| <!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"> |
| <script src="pkgdown.js"></script><script src="extra.js"></script><meta property="og:title" content="Integration to Apache Arrow"> |
| <meta property="og:description" content="Apache Arrow <https://arrow.apache.org/> is a cross-language |
| development platform for in-memory data. It specifies a standardized |
| language-independent columnar memory format for flat and hierarchical data, |
| organized for efficient analytic operations on modern hardware. This |
| package provides an interface to the Arrow C++ library."> |
| <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]> |
| <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script> |
| <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> |
| <![endif]-->
|
|
|
| <!-- Matomo -->
|
| <script>
|
| var _paq = window._paq = window._paq || [];
|
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
|
| _paq.push(["setDoNotTrack", true]);
|
| _paq.push(["disableCookies"]);
|
| _paq.push(['trackPageView']);
|
| _paq.push(['enableLinkTracking']);
|
| (function() {
|
| var u="https://analytics.apache.org/";
|
| _paq.push(['setTrackerUrl', u+'matomo.php']);
|
| _paq.push(['setSiteId', '20']);
|
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
|
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
|
| })();
|
| </script>
|
| <!-- End Matomo Code -->
|
| |
| </head> |
| <body data-spy="scroll" data-target="#toc"> |
| <div class="container template-home"> |
| <header><div class="navbar navbar-default navbar-fixed-top" role="navigation"> |
| <div class="container"> |
| <div class="navbar-header"> |
| <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false"> |
| <span class="sr-only">Toggle navigation</span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| </button> |
| <span class="navbar-brand"> |
| <a class="navbar-link" href="index.html">Arrow R Package</a> |
| <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">5.0.0</span> |
| </span> |
| </div> |
| |
| <div id="navbar" class="navbar-collapse collapse"> |
| <ul class="nav navbar-nav"> |
| <li> |
| <a href="https://arrow.apache.org/">❯❯❯</a> |
| </li> |
| <li> |
| <a href="articles/arrow.html">Get started</a> |
| </li> |
| <li> |
| <a href="reference/index.html">Reference</a> |
| </li> |
| <li class="dropdown"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"> |
| Articles |
| |
| <span class="caret"></span> |
| </a> |
| <ul class="dropdown-menu" role="menu"> |
| <li> |
| <a href="articles/install.html">Installing the Arrow Package on Linux</a> |
| </li> |
| <li> |
| <a href="articles/dataset.html">Working with Arrow Datasets and dplyr</a> |
| </li> |
| <li> |
| <a href="articles/fs.html">Working with Cloud Storage (S3)</a> |
| </li> |
| <li> |
| <a href="articles/python.html">Apache Arrow in Python and R with reticulate</a> |
| </li> |
| <li> |
| <a href="articles/flight.html">Connecting to Flight RPC Servers</a> |
| </li> |
| <li> |
| <a href="articles/developing.html">Arrow R Developer Guide</a> |
| </li> |
| </ul> |
| </li> |
| <li> |
| <a href="news/index.html">Changelog</a> |
| </li> |
| <li class="dropdown"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"> |
| Project docs |
| |
| <span class="caret"></span> |
| </a> |
| <ul class="dropdown-menu" role="menu"> |
| <li> |
| <a href="https://arrow.apache.org/docs/format/README.html">Specification</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/c_glib">C GLib</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/cpp">C++</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/java">Java</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/js">JavaScript</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/python">Python</a> |
| </li> |
| <li> |
| <a href="index.html">R</a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <ul class="nav navbar-nav navbar-right"></ul> |
| </div> |
| <!--/.nav-collapse --> |
| </div> |
| <!--/.container --> |
| </div> |
| <!--/.navbar --> |
| |
| |
| |
| </header><div class="row"> |
| <div class="contents col-md-9"> |
| <div id="arrow" class="section level1"> |
| <div class="page-header"><h1 class="hasAnchor"> |
| <a href="#arrow" class="anchor"></a>arrow</h1></div> |
| |
| <p><strong><a href="https://arrow.apache.org/">Apache Arrow</a> is a cross-language development platform for in-memory data.</strong> It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and interprocess communication.</p> |
| <p><strong>The <code>arrow</code> package exposes an interface to the Arrow C++ library, enabling access to many of its features in R.</strong> It provides low-level access to the Arrow C++ library API and higher-level access through a <code>dplyr</code> backend and familiar R functions.</p> |
| <div id="what-can-the-arrow-package-do" class="section level2"> |
| <h2 class="hasAnchor"> |
| <a href="#what-can-the-arrow-package-do" class="anchor"></a>What can the <code>arrow</code> package do?</h2> |
| <ul> |
| <li>Read and write <strong>Parquet files</strong> (<code><a href="reference/read_parquet.html">read_parquet()</a></code>, <code><a href="reference/write_parquet.html">write_parquet()</a></code>), an efficient and widely used columnar format</li> |
| <li>Read and write <strong>Feather files</strong> (<code><a href="reference/read_feather.html">read_feather()</a></code>, <code><a href="reference/write_feather.html">write_feather()</a></code>), a format optimized for speed and interoperability</li> |
| <li>Analyze, process, and write <strong>multi-file, larger-than-memory datasets</strong> (<code><a href="reference/open_dataset.html">open_dataset()</a></code>, <code><a href="reference/write_dataset.html">write_dataset()</a></code>)</li> |
| <li>Read <strong>large CSV and JSON files</strong> with excellent <strong>speed and efficiency</strong> (<code><a href="reference/read_delim_arrow.html">read_csv_arrow()</a></code>, <code><a href="reference/read_json_arrow.html">read_json_arrow()</a></code>)</li> |
| <li>Manipulate and analyze Arrow data with <strong><code>dplyr</code> verbs</strong> |
| </li> |
| <li>Read and write files in <strong>Amazon S3</strong> buckets with no additional function calls</li> |
| <li>Exercise <strong>fine control over column types</strong> for seamless interoperability with databases and data warehouse systems</li> |
| <li>Use <strong>compression codecs</strong> including Snappy, gzip, Brotli, Zstandard, LZ4, LZO, and bzip2 for reading and writing data</li> |
| <li>Enable <strong>zero-copy data sharing</strong> between <strong>R and Python</strong> |
| </li> |
| <li>Connect to <strong>Arrow Flight</strong> RPC servers to send and receive large datasets over networks</li> |
| <li>Access and manipulate Arrow objects through <strong>low-level bindings</strong> to the C++ library</li> |
| <li>Provide a <strong>toolkit for building connectors</strong> to other applications and services that use Arrow</li> |
| </ul> |
| </div> |
| <div id="installation" class="section level2"> |
| <h2 class="hasAnchor"> |
| <a href="#installation" class="anchor"></a>Installation</h2> |
| <div id="installing-the-latest-release-version" class="section level3"> |
| <h3 class="hasAnchor"> |
| <a href="#installing-the-latest-release-version" class="anchor"></a>Installing the latest release version</h3> |
| <p>Install the latest release of <code>arrow</code> from CRAN with</p> |
| <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span><span class="op">(</span><span class="st">"arrow"</span><span class="op">)</span></code></pre></div> |
| <p>Conda users can install <code>arrow</code> from conda-forge with</p> |
| <pre class="shell"><code>conda install -c conda-forge --strict-channel-priority r-arrow</code></pre> |
| <p>Installing a released version of the <code>arrow</code> package requires no additional system dependencies. For macOS and Windows, CRAN hosts binary packages that contain the Arrow C++ library. On Linux, source package installation will also build necessary C++ dependencies. For a faster, more complete installation, set the environment variable <code>NOT_CRAN=true</code>. See <code><a href="articles/install.html">vignette("install", package = "arrow")</a></code> for details.</p> |
| </div> |
| <div id="installing-a-development-version" class="section level3"> |
| <h3 class="hasAnchor"> |
| <a href="#installing-a-development-version" class="anchor"></a>Installing a development version</h3> |
| <p>Development versions of the package (binary and source) are built nightly and hosted at <a href="https://arrow-r-nightly.s3.amazonaws.com" class="uri">https://arrow-r-nightly.s3.amazonaws.com</a>. To install from there:</p> |
| <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span><span class="op">(</span><span class="st">"arrow"</span>, repos <span class="op">=</span> <span class="st">"https://arrow-r-nightly.s3.amazonaws.com"</span><span class="op">)</span></code></pre></div> |
| <p>Conda users can install <code>arrow</code> nightly builds with</p> |
| <pre class="shell"><code>conda install -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow</code></pre> |
| <p>If you already have a version of <code>arrow</code> installed, you can switch to the latest nightly development version with</p> |
| <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="fu">arrow</span><span class="fu">::</span><span class="fu"><a href="reference/install_arrow.html">install_arrow</a></span><span class="op">(</span>nightly <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></code></pre></div> |
| <p>These nightly package builds are not official Apache releases and are not recommended for production use. They may be useful for testing bug fixes and new features under active development.</p> |
| </div> |
| </div> |
| <div id="usage" class="section level2"> |
| <h2 class="hasAnchor"> |
| <a href="#usage" class="anchor"></a>Usage</h2> |
| <p>Among the many applications of the <code>arrow</code> package, two of the most accessible are:</p> |
| <ul> |
| <li>High-performance reading and writing of data files with multiple file formats and compression codecs, including built-in support for cloud storage</li> |
| <li>Analyzing and manipulating bigger-than-memory data with <code>dplyr</code> verbs</li> |
| </ul> |
| <p>The sections below describe these two uses and illustrate them with basic examples. The sections below mention two Arrow data structures:</p> |
| <ul> |
| <li> |
| <code>Table</code>: a tabular, column-oriented data structure capable of storing and processing large amounts of data more efficiently than R’s built-in <code>data.frame</code> and with SQL-like column data types that afford better interoperability with databases and data warehouse systems</li> |
| <li> |
| <code>Dataset</code>: a data structure functionally similar to <code>Table</code> but with the capability to work on larger-than-memory data partitioned across multiple files</li> |
| </ul> |
| <div id="reading-and-writing-data-files-with-arrow" class="section level3"> |
| <h3 class="hasAnchor"> |
| <a href="#reading-and-writing-data-files-with-arrow" class="anchor"></a>Reading and writing data files with <code>arrow</code> |
| </h3> |
| <p>The <code>arrow</code> package provides functions for reading single data files in several common formats. By default, calling any of these functions returns an R <code>data.frame</code>. To return an Arrow <code>Table</code>, set argument <code>as_data_frame = FALSE</code>.</p> |
| <ul> |
| <li> |
| <code><a href="reference/read_parquet.html">read_parquet()</a></code>: read a file in Parquet format</li> |
| <li> |
| <code><a href="reference/read_feather.html">read_feather()</a></code>: read a file in Feather format (the Apache Arrow IPC format)</li> |
| <li> |
| <code><a href="reference/read_delim_arrow.html">read_delim_arrow()</a></code>: read a delimited text file (default delimiter is comma)</li> |
| <li> |
| <code><a href="reference/read_delim_arrow.html">read_csv_arrow()</a></code>: read a comma-separated values (CSV) file</li> |
| <li> |
| <code><a href="reference/read_delim_arrow.html">read_tsv_arrow()</a></code>: read a tab-separated values (TSV) file</li> |
| <li> |
| <code><a href="reference/read_json_arrow.html">read_json_arrow()</a></code>: read a JSON data file</li> |
| </ul> |
| <p>For writing data to single files, the <code>arrow</code> package provides the functions <code><a href="reference/write_parquet.html">write_parquet()</a></code> and <code><a href="reference/write_feather.html">write_feather()</a></code>. These can be used with R <code>data.frame</code> and Arrow <code>Table</code> objects.</p> |
| <p>For example, let’s write the Star Wars characters data that’s included in <code>dplyr</code> to a Parquet file, then read it back in. Parquet is a popular choice for storing analytic data; it is optimized for reduced file sizes and fast read performance, especially for column-based access patterns. Parquet is widely supported by many tools and platforms.</p> |
| <p>First load the <code>arrow</code> and <code>dplyr</code> packages:</p> |
| <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/apache/arrow/">arrow</a></span>, warn.conflicts <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span> |
| <span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org">dplyr</a></span>, warn.conflicts <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></code></pre></div> |
| <p>Then write the <code>data.frame</code> named <code>starwars</code> to a Parquet file at <code>file_path</code>:</p> |
| <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">file_path</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html">tempfile</a></span><span class="op">(</span><span class="op">)</span> |
| <span class="fu"><a href="reference/write_parquet.html">write_parquet</a></span><span class="op">(</span><span class="va">starwars</span>, <span class="va">file_path</span><span class="op">)</span></code></pre></div> |
| <p>Then read the Parquet file into an R <code>data.frame</code> named <code>sw</code>:</p> |
| <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">sw</span> <span class="op"><-</span> <span class="fu"><a href="reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span><span class="op">)</span></code></pre></div> |
| <p>R object attributes are preserved when writing data to Parquet or Feather files and when reading those files back into R. This enables round-trip writing and reading of <code>sf::sf</code> objects, R <code>data.frame</code>s with with <code>haven::labelled</code> columns, and <code>data.frame</code>s with other custom attributes.</p> |
| <p>For reading and writing larger files or sets of multiple files, <code>arrow</code> defines <code>Dataset</code> objects and provides the functions <code><a href="reference/open_dataset.html">open_dataset()</a></code> and <code><a href="reference/write_dataset.html">write_dataset()</a></code>, which enable analysis and processing of bigger-than-memory data, including the ability to partition data into smaller chunks without loading the full data into memory. For examples of these functions, see <code><a href="articles/dataset.html">vignette("dataset", package = "arrow")</a></code>.</p> |
| <p>All these functions can read and write files in the local filesystem or in Amazon S3 (by passing S3 URIs beginning with <code>s3://</code>). For more details, see <code><a href="articles/fs.html">vignette("fs", package = "arrow")</a></code></p> |
| </div> |
| <div id="using-dplyr-with-arrow" class="section level3"> |
| <h3 class="hasAnchor"> |
| <a href="#using-dplyr-with-arrow" class="anchor"></a>Using <code>dplyr</code> with <code>arrow</code> |
| </h3> |
| <p>The <code>arrow</code> package provides a <code>dplyr</code> backend enabling manipulation of Arrow tabular data with <code>dplyr</code> verbs. To use it, first load both packages <code>arrow</code> and <code>dplyr</code>. Then load data into an Arrow <code>Table</code> or <code>Dataset</code> object. For example, read the Parquet file written in the previous example into an Arrow <code>Table</code> named <code>sw</code>:</p> |
| <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">sw</span> <span class="op"><-</span> <span class="fu"><a href="reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span>, as_data_frame <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></code></pre></div> |
| <p>Next, pipe on <code>dplyr</code> verbs:</p> |
| <div class="sourceCode" id="cb10"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">result</span> <span class="op"><-</span> <span class="va">sw</span> <span class="op">%>%</span> |
| <span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span><span class="op">(</span><span class="va">homeworld</span> <span class="op">==</span> <span class="st">"Tatooine"</span><span class="op">)</span> <span class="op">%>%</span> |
| <span class="fu"><a href="https://dplyr.tidyverse.org/reference/rename.html">rename</a></span><span class="op">(</span>height_cm <span class="op">=</span> <span class="va">height</span>, mass_kg <span class="op">=</span> <span class="va">mass</span><span class="op">)</span> <span class="op">%>%</span> |
| <span class="fu"><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate</a></span><span class="op">(</span>height_in <span class="op">=</span> <span class="va">height_cm</span> <span class="op">/</span> <span class="fl">2.54</span>, mass_lbs <span class="op">=</span> <span class="va">mass_kg</span> <span class="op">*</span> <span class="fl">2.2046</span><span class="op">)</span> <span class="op">%>%</span> |
| <span class="fu"><a href="https://dplyr.tidyverse.org/reference/arrange.html">arrange</a></span><span class="op">(</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/desc.html">desc</a></span><span class="op">(</span><span class="va">birth_year</span><span class="op">)</span><span class="op">)</span> <span class="op">%>%</span> |
| <span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span><span class="op">(</span><span class="va">name</span>, <span class="va">height_in</span>, <span class="va">mass_lbs</span><span class="op">)</span></code></pre></div> |
| <p>The <code>arrow</code> package uses lazy evaluation to delay computation until the result is required. This speeds up processing by enabling the Arrow C++ library to perform multiple computations in one operation. <code>result</code> is an object with class <code>arrow_dplyr_query</code> which represents all the computations to be performed:</p> |
| <div class="sourceCode" id="cb11"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">result</span> |
| <span class="co">#> Table (query)</span> |
| <span class="co">#> name: string</span> |
| <span class="co">#> height_in: expr</span> |
| <span class="co">#> mass_lbs: expr</span> |
| <span class="co">#></span> |
| <span class="co">#> * Filter: equal(homeworld, "Tatooine")</span> |
| <span class="co">#> * Sorted by birth_year [desc]</span> |
| <span class="co">#> See $.data for the source Arrow object</span></code></pre></div> |
| <p>To perform these computations and materialize the result, call <code><a href="https://dplyr.tidyverse.org/reference/compute.html">compute()</a></code> or <code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code>. <code><a href="https://dplyr.tidyverse.org/reference/compute.html">compute()</a></code> returns an Arrow <code>Table</code>, suitable for passing to other <code>arrow</code> or <code>dplyr</code> functions:</p> |
| <div class="sourceCode" id="cb12"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">result</span> <span class="op">%>%</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/compute.html">compute</a></span><span class="op">(</span><span class="op">)</span> |
| <span class="co">#> Table</span> |
| <span class="co">#> 10 rows x 3 columns</span> |
| <span class="co">#> $name <string></span> |
| <span class="co">#> $height_in <double></span> |
| <span class="co">#> $mass_lbs <double></span></code></pre></div> |
| <p><code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code> returns an R <code>data.frame</code>, suitable for viewing or passing to other R functions for analysis or visualization:</p> |
| <div class="sourceCode" id="cb13"><pre class="downlit sourceCode r"> |
| <code class="sourceCode R"><span class="va">result</span> <span class="op">%>%</span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/compute.html">collect</a></span><span class="op">(</span><span class="op">)</span> |
| <span class="co">#> # A tibble: 10 x 3</span> |
| <span class="co">#> name height_in mass_lbs</span> |
| <span class="co">#> <chr> <dbl> <dbl></span> |
| <span class="co">#> 1 C-3PO 65.7 165.</span> |
| <span class="co">#> 2 Cliegg Lars 72.0 NA </span> |
| <span class="co">#> 3 Shmi Skywalker 64.2 NA </span> |
| <span class="co">#> 4 Owen Lars 70.1 265.</span> |
| <span class="co">#> 5 Beru Whitesun lars 65.0 165.</span> |
| <span class="co">#> 6 Darth Vader 79.5 300.</span> |
| <span class="co">#> 7 Anakin Skywalker 74.0 185.</span> |
| <span class="co">#> 8 Biggs Darklighter 72.0 185.</span> |
| <span class="co">#> 9 Luke Skywalker 67.7 170.</span> |
| <span class="co">#> 10 R5-D4 38.2 70.5</span></code></pre></div> |
| <p>The <code>arrow</code> package works with most single-table <code>dplyr</code> verbs except those that compute aggregates, such as <code><a href="https://dplyr.tidyverse.org/reference/summarise.html">summarise()</a></code> and <code><a href="https://dplyr.tidyverse.org/reference/mutate.html">mutate()</a></code> after <code><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by()</a></code>. Inside <code>dplyr</code> verbs, Arrow offers support for many functions and operators, with common functions mapped to their base R and tidyverse equivalents. The <a href="https://arrow.apache.org/docs/r/news/index.html">changelog</a> lists many of them. If there are additional functions you would like to see implemented, please file an issue as described in the <a href="#getting-help">Getting help</a> section below.</p> |
| <p>For <code>dplyr</code> queries on <code>Table</code> objects, if the <code>arrow</code> package detects an unimplemented function within a <code>dplyr</code> verb, it automatically calls <code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code> to return the data as an R <code>data.frame</code> before processing that <code>dplyr</code> verb. For queries on <code>Dataset</code> objects (which can be larger than memory), it raises an error if the function is unimplemented; you need to explicitly tell it to <code><a href="https://dplyr.tidyverse.org/reference/compute.html">collect()</a></code>.</p> |
| </div> |
| <div id="additional-features" class="section level3"> |
| <h3 class="hasAnchor"> |
| <a href="#additional-features" class="anchor"></a>Additional features</h3> |
| <p>Other applications of <code>arrow</code> are described in the following vignettes:</p> |
| <ul> |
| <li> |
| <code><a href="articles/python.html">vignette("python", package = "arrow")</a></code>: use <code>arrow</code> and <code>reticulate</code> to pass data between R and Python</li> |
| <li> |
| <code><a href="articles/flight.html">vignette("flight", package = "arrow")</a></code>: connect to Arrow Flight RPC servers to send and receive data</li> |
| <li> |
| <code><a href="articles/arrow.html">vignette("arrow", package = "arrow")</a></code>: access and manipulate Arrow objects through low-level bindings to the C++ library</li> |
| </ul> |
| </div> |
| </div> |
| <div id="getting-help" class="section level2"> |
| <h2 class="hasAnchor"> |
| <a href="#getting-help" class="anchor"></a>Getting help</h2> |
| <p>If you encounter a bug, please file an issue with a minimal reproducible example on the <a href="https://issues.apache.org/jira/projects/ARROW/issues">Apache Jira issue tracker</a>. Create an account or log in, then click <strong>Create</strong> to file an issue. Select the project <strong>Apache Arrow (ARROW)</strong>, select the component <strong>R</strong>, and begin the issue summary with <strong><code>[R]</code></strong> followed by a space. For more information, see the <strong>Report bugs and propose features</strong> section of the <a href="https://arrow.apache.org/docs/developers/contributing.html">Contributing to Apache Arrow</a> page in the Arrow developer documentation.</p> |
| <p>We welcome questions, discussion, and contributions from users of the <code>arrow</code> package. For information about mailing lists and other venues for engaging with the Arrow developer and user communities, please see the <a href="https://arrow.apache.org/community/">Apache Arrow Community</a> page.</p> |
| <hr> |
| <p>All participation in the Apache Arrow project is governed by the Apache Software Foundation’s <a href="https://www.apache.org/foundation/policies/conduct.html">code of conduct</a>.</p> |
| </div> |
| </div> |
| </div> |
| |
| <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar"> |
| <div class="links"> |
| <h2>Links</h2> |
| <ul class="list-unstyled"> |
| <li>Download from CRAN at <br><a href="https://cloud.r-project.org/package=arrow">https://cloud.r-project.org/package=arrow</a> |
| </li> |
| <li>Report a bug at <br><a href="https://issues.apache.org/jira/projects/ARROW/issues">https://issues.apache.org/jira/projects/ARROW/issues</a> |
| </li> |
| </ul> |
| </div> |
| <div class="license"> |
| <h2>License</h2> |
| <ul class="list-unstyled"> |
| <li>Apache License (>= 2.0)</li> |
| </ul> |
| </div> |
| <div class="developers"> |
| <h2>Developers</h2> |
| <ul class="list-unstyled"> |
| <li>Neal Richardson <br><small class="roles"> Author, maintainer </small> </li> |
| <li>Ian Cook <br><small class="roles"> Author </small> </li> |
| <li>Nic Crane <br><small class="roles"> Author </small> </li> |
| <li>Jonathan Keane <br><small class="roles"> Author </small> </li> |
| <li>Romain François <br><small class="roles"> Author </small> <a href="https://orcid.org/0000-0002-2444-4226" target="orcid.widget" aria-label="ORCID"><span class="fab fa-orcid orcid" aria-hidden="true"></span></a> </li> |
| <li>Jeroen Ooms <br><small class="roles"> Author </small> </li> |
| <li>Apache Arrow <br><small class="roles"> Author, copyright holder </small> </li> |
| <li><a href="authors.html">All authors...</a></li> |
| </ul> |
| </div> |
| |
| <div class="dev-status"> |
| <h2>Dev status</h2> |
| <ul class="list-unstyled"> |
| <li><a href="https://cran.r-project.org/package=arrow"><img src="https://www.r-pkg.org/badges/version-last-release/arrow" alt="cran"></a></li> |
| <li><a href="https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amaster+event%3Apush"><img src="https://github.com/apache/arrow/workflows/R/badge.svg?event=push" alt="CI"></a></li> |
| <li><a href="https://anaconda.org/conda-forge/r-arrow"><img src="https://img.shields.io/conda/vn/conda-forge/r-arrow.svg" alt="conda-forge"></a></li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| |
| |
| <footer><div class="copyright"> |
| <p>Developed by Neal Richardson, Ian Cook, Nic Crane, Jonathan Keane, Romain François, Jeroen Ooms, Apache Arrow.</p> |
| </div> |
| |
| <div class="pkgdown"> |
| <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p> |
| </div> |
| |
| </footer> |
| </div> |
| |
| |
| |
| |
| <script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body> |
| </html> |