blob: 9088ce575718eb802ce6bef2cc8d43c40f8b95d5 [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="description" content="Apache Arrow &lt;https://arrow.apache.org/&gt; is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the Arrow C++ library.">
<title>Arrow R Package • Arrow R Package</title>
<!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="apple-touch-icon.png">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="apple-touch-icon-60x60.png">
<script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<link href="deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet">
<script src="deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
<!-- bootstrap-toc --><script src="https://cdn.jsdelivr.net/gh/afeld/bootstrap-toc@v1.0.1/dist/bootstrap-toc.min.js" integrity="sha256-4veVQbu7//Lk5TSmc7YV48MxtMy98e26cf5MrgZYnwo=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.11/clipboard.min.js" integrity="sha512-7O5pXpc0oCRrxk8RUfDYFgn0nO1t+jLuIOQdOMRp4APB7uZ4vSjspzp5y6YDtDs4VzUSTbWzBFZ/LKJhnyFOKw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="pkgdown.js"></script><link href="extra.css" rel="stylesheet">
<meta property="og:title" content="Arrow R Package">
<meta property="og:description" content="Apache Arrow &lt;https://arrow.apache.org/&gt; is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the Arrow C++ library.">
<meta property="og:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png">
<meta property="og:image:alt" content="Apache Arrow logo, displaying the triple chevron image adjacent to the text">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:creator" content="@apachearrow">
<meta name="twitter:site" content="@apachearrow">
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Matomo --><script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script><!-- End Matomo Code -->
</head>
<body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-black"><div class="container">
<a class="navbar-brand me-2" href="index.html">Arrow R Package</a>
<span class="version">
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">16.1.0.9000</small>
</span>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-3">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link" href="articles/arrow.html">Get started</a>
</li>
<li class="nav-item">
<a class="nav-link" href="reference/index.html">Reference</a>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
<div class="dropdown-menu" aria-labelledby="dropdown-articles">
<h6 class="dropdown-header" data-toc-skip>Using the package</h6>
<a class="dropdown-item" href="articles/read_write.html">Reading and writing data files</a>
<a class="dropdown-item" href="articles/data_wrangling.html">Data analysis with dplyr syntax</a>
<a class="dropdown-item" href="articles/dataset.html">Working with multi-file data sets</a>
<a class="dropdown-item" href="articles/python.html">Integrating Arrow, Python, and R</a>
<a class="dropdown-item" href="articles/fs.html">Using cloud storage (S3, GCS)</a>
<a class="dropdown-item" href="articles/flight.html">Connecting to a Flight server</a>
<div class="dropdown-divider"></div>
<h6 class="dropdown-header" data-toc-skip>Arrow concepts</h6>
<a class="dropdown-item" href="articles/data_objects.html">Data objects</a>
<a class="dropdown-item" href="articles/data_types.html">Data types</a>
<a class="dropdown-item" href="articles/metadata.html">Metadata</a>
<div class="dropdown-divider"></div>
<h6 class="dropdown-header" data-toc-skip>Installation</h6>
<a class="dropdown-item" href="articles/install.html">Installing on Linux</a>
<a class="dropdown-item" href="articles/install_nightly.html">Installing development versions</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="articles/index.html">More articles...</a>
</div>
</li>
<li class="nav-item">
<a class="nav-link" href="news/index.html">Changelog</a>
</li>
</ul>
<form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="search.json" id="search-input" placeholder="Search for" autocomplete="off">
</form>
<ul class="navbar-nav">
<li class="nav-item">
<a class="external-link nav-link" href="https://github.com/apache/arrow/" aria-label="github">
<span class="fab fa fab fa-github fa-lg"></span>
</a>
</li>
</ul>
</div>
</div>
</nav><div class="container template-home">
<div class="row">
<main id="main" class="col-md-9"><div class="section level1">
<div class="page-header"><h1 id="arrow-">arrow <img src="https://arrow.apache.org/img/arrow-logo_hex_black-txt_white-bg.png" align="right" alt="" width="120"><a class="anchor" aria-label="anchor" href="#arrow-"></a>
</h1></div>
<!-- badges: start -->
<!-- badges: end -->
<div class="section level2">
<h2 id="overview">Overview<a class="anchor" aria-label="anchor" href="#overview"></a>
</h2>
<p>The R <a href="https://github.com/apache/arrow/" class="external-link">arrow</a> package provides access to many of the features of the <a href="https://arrow.apache.org/docs/cpp/index.html" class="external-link">Apache Arrow C++ library</a> for R users. The goal of arrow is to provide an Arrow C++ backend to <a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a>, and access to the Arrow C++ library through familiar base R and tidyverse functions, or <a href="https://r6.r-lib.org" class="external-link">R6</a> classes.</p>
<p>To learn more about the Apache Arrow project, see the parent documentation of the <a href="https://arrow.apache.org/" class="external-link">Arrow Project</a>. The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the <a href="https://arrow.apache.org/docs/r/articles/read_write.html">read/write article</a> to learn about reading and writing data files, <a href="https://arrow.apache.org/docs/r/articles/data_wrangling.html">data wrangling</a> to learn how to use dplyr syntax with arrow objects, and the <a href="https://arrow.apache.org/docs/r/reference/acero.html">function documentation</a> for a full list of supported functions within dplyr queries.</p>
</div>
<div class="section level2">
<h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
</h2>
<p>The latest release of arrow can be installed from CRAN. In most cases installing the latest release should work without requiring any additional system dependencies, especially if you are using Windows or macOS.</p>
<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html" class="external-link">install.packages</a></span><span class="op">(</span><span class="st">"arrow"</span><span class="op">)</span></span></code></pre></div>
<p>Alternatively, if you are using conda you can install arrow from conda-forge:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode sh"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">-c</span> conda-forge <span class="at">--strict-channel-priority</span> r-arrow</span></code></pre></div>
<p>There are some special cases to note:</p>
<ul>
<li><p>On macOS, the R you use with Arrow should match the architecture of the machine you are using. If you’re using an ARM (aka M1, M2, etc.) processor use R compiled for arm64. If you’re using an Intel based mac, use R compiled for x86. Using R and Arrow compiled for Intel based macs on an ARM based mac will result in segfaults and crashes.</p></li>
<li><p>On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the <a href="https://arrow.apache.org/docs/r/articles/install.html">installation guide</a>.</p></li>
<li><p>If you are compiling arrow from source, please note that as of version 10.0.0, arrow requires C++17 to build. This has implications on Windows and CentOS 7. For Windows users it means you need to be running an R version of 4.0 or later. On CentOS 7, it means you need to install a newer compiler than the default system compiler gcc. See the <a href="https://arrow.apache.org/docs/r/articles/developers/install_details.html">installation details article</a> for guidance.</p></li>
<li><p>Development versions of arrow are released nightly. For information on how to installl nightly builds please see the <a href="https://arrow.apache.org/docs/r/articles/install_nightly.html">installing nightly builds</a> article.</p></li>
</ul>
</div>
<div class="section level2">
<h2 id="what-can-the-arrow-package-do">What can the arrow package do?<a class="anchor" aria-label="anchor" href="#what-can-the-arrow-package-do"></a>
</h2>
<p>The Arrow C++ library is comprised of different parts, each of which serves a specific purpose. The arrow package provides binding to the C++ functionality for a wide range of data analysis tasks.</p>
<p>It allows users to read and write data in a variety formats:</p>
<ul>
<li>Read and write Parquet files, an efficient and widely used columnar format</li>
<li>Read and write Arrow (formerly known as Feather) files, a format optimized for speed and interoperability</li>
<li>Read and write CSV files with excellent speed and efficiency</li>
<li>Read and write multi-file and larger-than-memory datasets</li>
<li>Read JSON files</li>
</ul>
<p>It provides access to remote filesystems and servers:</p>
<ul>
<li>Read and write files in Amazon S3 and Google Cloud Storage buckets</li>
<li>Connect to Arrow Flight servers to transport large datasets over networks</li>
</ul>
<p>Additional features include:</p>
<ul>
<li>Manipulate and analyze Arrow data with dplyr verbs</li>
<li>Zero-copy data sharing between R and Python</li>
<li>Fine control over column types to work seamlessly with databases and data warehouses</li>
<li>Toolkit for building connectors to other applications and services that use Arrow</li>
</ul>
</div>
<div class="section level2">
<h2 id="what-is-apache-arrow">What is Apache Arrow?<a class="anchor" aria-label="anchor" href="#what-is-apache-arrow"></a>
</h2>
<p>Apache Arrow is a cross-language development platform for in-memory and larger-than-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming, messaging, and interprocess communication.</p>
<p>This package exposes an interface to the Arrow C++ library, enabling access to many of its features in R. It provides low-level access to the Arrow C++ library API and higher-level access through a dplyr backend and familiar R functions.</p>
</div>
<div class="section level2">
<h2 id="arrow-resources">Arrow resources<a class="anchor" aria-label="anchor" href="#arrow-resources"></a>
</h2>
<p>There are a few additional resources that you may find useful for getting started with arrow:</p>
<ul>
<li>The official <a href="https://arrow.apache.org/docs/r/">Arrow R package documentation</a>
</li>
<li><a href="https://github.com/apache/arrow/blob/-/r/cheatsheet/arrow-cheatsheet.pdf" class="external-link">Arrow for R cheatsheet</a></li>
<li><a href="https://arrow.apache.org/cookbook/r/index.html" class="external-link">Apache Arrow R Cookbook</a></li>
<li>R for Data Science <a href="https://r4ds.hadley.nz/arrow" class="external-link">Chapter on Arrow</a>
</li>
<li><a href="https://github.com/thisisnic/awesome-arrow-r" class="external-link">Awesome Arrow R</a></li>
</ul>
</div>
<div class="section level2">
<h2 id="getting-help">Getting help<a class="anchor" aria-label="anchor" href="#getting-help"></a>
</h2>
<p>We welcome questions, discussion, and contributions from users of the arrow package. For information about mailing lists and other venues for engaging with the Arrow developer and user communities, please see the <a href="https://arrow.apache.org/community/" class="external-link">Apache Arrow Community</a> page.</p>
<p>If you encounter a bug, please file an issue with a minimal reproducible example on <a href="https://github.com/apache/arrow/issues" class="external-link">GitHub issues</a>. Log in to your GitHub account, click on <strong>New issue</strong> and select the type of issue you want to create. Add a meaningful title prefixed with <strong><code>[R]</code></strong> followed by a space, the issue summary and select component <strong>R</strong> from the dropdown list. For more information, see the <strong>Report bugs and propose features</strong> section of the <a href="https://arrow.apache.org/docs/developers/#contributing" class="external-link">Contributing to Apache Arrow</a> page in the Arrow developer documentation.</p>
</div>
<div class="section level2">
<h2 id="code-of-conduct">Code of Conduct<a class="anchor" aria-label="anchor" href="#code-of-conduct"></a>
</h2>
<p>Please note that all participation in the Apache Arrow project is governed by the Apache Software Foundation’s <a href="https://www.apache.org/foundation/policies/conduct.html" class="external-link">code of conduct</a>.</p>
</div>
</div>
</main><aside class="col-md-3"><div class="arrow-project">
<h2 data-toc-skip>Arrow Project</h2>
<ul class="list-unstyled">
<li><p><a href="https://arrow.apache.org/" class="external-link">Homepage</a> <br><a href="https://arrow.apache.org/docs/format/Columnar.html" class="external-link">Specifications</a></p></li>
</ul>
</div>
<div class="links">
<h2 data-toc-skip>Links</h2>
<ul class="list-unstyled">
<li><a href="https://cloud.r-project.org/package=arrow" class="external-link">View on CRAN</a></li>
<li><a href="https://github.com/apache/arrow/" class="external-link">Browse source code</a></li>
<li><a href="https://github.com/apache/arrow/issues" class="external-link">Report a bug</a></li>
</ul>
</div>
<div class="license">
<h2 data-toc-skip>License</h2>
<ul class="list-unstyled">
<li>Apache License (&gt;= 2.0)</li>
</ul>
</div>
<div class="community">
<h2 data-toc-skip>Community</h2>
<ul class="list-unstyled">
<li><p><a href="https://www.apache.org/foundation/policies/conduct.html" class="external-link">Code of conduct</a></p></li>
</ul>
</div>
<div class="implementations">
<h2 data-toc-skip>Implementations</h2>
<ul class="list-unstyled">
<li><p><a href="https://arrow.apache.org/docs/c_glib" class="external-link">C GLib</a> <br><a href="https://arrow.apache.org/docs/cpp" class="external-link">C++</a> <br><a href="https://github.com/apache/arrow/blob/main/csharp/README.md" class="external-link">C#</a> <br><a href="https://pkg.go.dev/github.com/apache/arrow/go/v17" class="external-link">Go</a> <br><a href="https://arrow.apache.org/docs/java" class="external-link">Java</a> <br><a href="https://arrow.apache.org/docs/js" class="external-link">JavaScript</a> <br><a href="https://github.com/apache/arrow-julia/blob/main/README.md" class="external-link">Julia</a> <br><a href="https://github.com/apache/arrow/blob/main/matlab/README.md" class="external-link">MATLAB</a> <br><a href="https://arrow.apache.org/docs/python" class="external-link">Python</a> <br><a href="index.html">R</a> <br><a href="https://github.com/apache/arrow/blob/main/ruby/README.md" class="external-link">Ruby</a> <br><a href="https://docs.rs/crate/arrow/latest" class="external-link">Rust</a></p></li>
</ul>
</div>
<div class="citation">
<h2 data-toc-skip>Citation</h2>
<ul class="list-unstyled">
<li><a href="authors.html#citation">Citing arrow</a></li>
</ul>
</div>
<div class="developers">
<h2 data-toc-skip>Developers</h2>
<ul class="list-unstyled">
<li>Neal Richardson <br><small class="roles"> Author </small> </li>
<li>Ian Cook <br><small class="roles"> Author </small> </li>
<li>Nic Crane <br><small class="roles"> Author </small> </li>
<li>Dewey Dunnington <br><small class="roles"> Author </small> <a href="https://orcid.org/0000-0002-9415-4582" target="orcid.widget" aria-label="ORCID" class="external-link"><span class="fab fa-orcid orcid" aria-hidden="true"></span></a> </li>
<li>Romain François <br><small class="roles"> Author </small> <a href="https://orcid.org/0000-0002-2444-4226" target="orcid.widget" aria-label="ORCID" class="external-link"><span class="fab fa-orcid orcid" aria-hidden="true"></span></a> </li>
<li>Jonathan Keane <br><small class="roles"> Author, maintainer </small> </li>
<li>Dragoș Moldovan-Grünfeld <br><small class="roles"> Author </small> </li>
<li>Jeroen Ooms <br><small class="roles"> Author </small> </li>
<li>Jacob Wujciak-Jens <br><small class="roles"> Author </small> </li>
<li>Apache Arrow <br><small class="roles"> Author, copyright holder </small> </li>
<li><a href="authors.html">More about authors...</a></li>
</ul>
</div>
<div class="dev-status">
<h2 data-toc-skip>Dev status</h2>
<ul class="list-unstyled">
<li><a href="https://cran.r-project.org/package=arrow" class="external-link"><img src="https://www.r-pkg.org/badges/version-last-release/arrow" alt="cran"></a></li>
<li><a href="https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amain+event%3Apush" class="external-link"><img src="https://github.com/apache/arrow/workflows/R/badge.svg?event=push" alt="CI"></a></li>
<li><a href="https://anaconda.org/conda-forge/r-arrow" class="external-link"><img src="https://img.shields.io/conda/vn/conda-forge/r-arrow.svg" alt="conda-forge"></a></li>
</ul>
</div>
</aside>
</div>
<footer><div class="pkgdown-footer-left">
<p><a href="https://arrow.apache.org/docs/r/versions.html">Older versions of these docs</a></p>
</div>
<div class="pkgdown-footer-right">
<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.9.</p>
</div>
</footer>
</div>
</body>
</html>