blob: 199b54d4ef5b180d909122cb05373f3aa54d57de [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="description" content="Learn how to read and write CSV, Parquet, and Feather files with arrow
">
<title>Reading and writing data files • Arrow R Package</title>
<!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png">
<script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<link href="../deps/bootstrap-5.2.2/bootstrap.min.css" rel="stylesheet">
<script src="../deps/bootstrap-5.2.2/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
<!-- bootstrap-toc --><script src="https://cdn.jsdelivr.net/gh/afeld/bootstrap-toc@v1.0.1/dist/bootstrap-toc.min.js" integrity="sha256-4veVQbu7//Lk5TSmc7YV48MxtMy98e26cf5MrgZYnwo=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="Reading and writing data files">
<meta property="og:description" content="Learn how to read and write CSV, Parquet, and Feather files with arrow
">
<meta property="og:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png">
<meta property="og:image:alt" content="Apache Arrow logo, displaying the triple chevron image adjacent to the text">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:creator" content="@apachearrow">
<meta name="twitter:site" content="@apachearrow">
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Matomo --><script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script><!-- End Matomo Code -->
</head>
<body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-black"><div class="container">
<a class="navbar-brand me-2" href="../index.html">Arrow R Package</a>
<span class="version">
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">13.0.0</small>
</span>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-3">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link" href="../articles/arrow.html">Get started</a>
</li>
<li class="nav-item">
<a class="nav-link" href="../reference/index.html">Reference</a>
</li>
<li class="active nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
<div class="dropdown-menu" aria-labelledby="dropdown-articles">
<h6 class="dropdown-header" data-toc-skip>Using the package</h6>
<a class="dropdown-item" href="../articles/read_write.html">Reading and writing data files</a>
<a class="dropdown-item" href="../articles/data_wrangling.html">Data analysis with dplyr syntax</a>
<a class="dropdown-item" href="../articles/dataset.html">Working with multi-file data sets</a>
<a class="dropdown-item" href="../articles/python.html">Integrating Arrow, Python, and R</a>
<a class="dropdown-item" href="../articles/fs.html">Using cloud storage (S3, GCS)</a>
<a class="dropdown-item" href="../articles/flight.html">Connecting to a Flight server</a>
<div class="dropdown-divider"></div>
<h6 class="dropdown-header" data-toc-skip>Arrow concepts</h6>
<a class="dropdown-item" href="../articles/data_objects.html">Data objects</a>
<a class="dropdown-item" href="../articles/data_types.html">Data types</a>
<a class="dropdown-item" href="../articles/metadata.html">Metadata</a>
<div class="dropdown-divider"></div>
<h6 class="dropdown-header" data-toc-skip>Installation</h6>
<a class="dropdown-item" href="../articles/install.html">Installing on Linux</a>
<a class="dropdown-item" href="../articles/install_nightly.html">Installing development versions</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="../articles/index.html">More articles...</a>
</div>
</li>
<li class="nav-item">
<a class="nav-link" href="../news/index.html">Changelog</a>
</li>
</ul>
<form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off">
</form>
<ul class="navbar-nav">
<li class="nav-item">
<a class="external-link nav-link" href="https://github.com/apache/arrow/" aria-label="github">
<span class="fab fa fab fa-github fa-lg"></span>
</a>
</li>
</ul>
</div>
</div>
</nav><div class="container template-article">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<img src="" class="logo" alt=""><h1>Reading and writing data files</h1>
<small class="dont-index">Source: <a href="https://github.com/apache/arrow/blob/main/r/vignettes/read_write.Rmd" class="external-link"><code>vignettes/read_write.Rmd</code></a></small>
<div class="d-none name"><code>read_write.Rmd</code></div>
</div>
<p>The arrow package provides functions for reading single data files into memory, in several common formats. By default, calling any of these functions returns an R data frame. To return an Arrow Table, set argument <code>as_data_frame = FALSE</code>.</p>
<ul>
<li>
<code><a href="../reference/read_parquet.html">read_parquet()</a></code>: read a file in Parquet format</li>
<li>
<code><a href="../reference/read_feather.html">read_feather()</a></code>: read a file in the Apache Arrow IPC format (formerly called the Feather format)</li>
<li>
<code><a href="../reference/read_delim_arrow.html">read_delim_arrow()</a></code>: read a delimited text file (default delimiter is comma)</li>
<li>
<code><a href="../reference/read_delim_arrow.html">read_csv_arrow()</a></code>: read a comma-separated values (CSV) file</li>
<li>
<code><a href="../reference/read_delim_arrow.html">read_tsv_arrow()</a></code>: read a tab-separated values (TSV) file</li>
<li>
<code><a href="../reference/read_json_arrow.html">read_json_arrow()</a></code>: read a JSON data file</li>
</ul>
<p>For writing data to single files, the arrow package provides the following functions, which can be used with both R data frames and Arrow Tables:</p>
<ul>
<li>
<code><a href="../reference/write_parquet.html">write_parquet()</a></code>: write a file in Parquet format</li>
<li>
<code><a href="../reference/write_feather.html">write_feather()</a></code>: write a file in Arrow IPC format</li>
<li>
<code><a href="../reference/write_csv_arrow.html">write_csv_arrow()</a></code>: write a file in CSV format</li>
</ul>
<p>All these functions can read and write files in the local filesystem or to cloud storage. For more on cloud storage support in arrow, see the <a href="./fs.html">cloud storage article</a>.</p>
<p>The arrow package also supports reading larger-than-memory single data files, and reading and writing multi-file data sets. This enables analysis and processing of larger-than-memory data, and provides the ability to partition data into smaller chunks without loading the full data into memory. For more information on this topic, see the <a href="./dataset.html">dataset article</a>.</p>
<div class="section level2">
<h2 id="parquet-format">Parquet format<a class="anchor" aria-label="anchor" href="#parquet-format"></a>
</h2>
<p><a href="https://parquet.apache.org/" class="external-link">Apache Parquet</a> is a popular choice for storing analytics data; it is a binary format that is optimized for reduced file sizes and fast read performance, especially for column-based access patterns. The simplest way to read and write Parquet data using arrow is with the <code><a href="../reference/read_parquet.html">read_parquet()</a></code> and <code><a href="../reference/write_parquet.html">write_parquet()</a></code> functions. To illustrate this, we’ll write the <code>starwars</code> data included in dplyr to a Parquet file, then read it back in. First load the arrow and dplyr packages:</p>
<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://github.com/apache/arrow/" class="external-link">arrow</a></span>, warn.conflicts <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
<span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span>, warn.conflicts <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code></pre></div>
<p>Next we’ll write the data frame to a Parquet file located at <code>file_path</code>:</p>
<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="va">file_path</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span></span>
<span><span class="fu"><a href="../reference/write_parquet.html">write_parquet</a></span><span class="op">(</span><span class="va">starwars</span>, <span class="va">file_path</span><span class="op">)</span></span></code></pre></div>
<p>The size of a Parquet file is typically much smaller than the corresponding CSV file would have been. This is in part due to the use of file compression: by default, Parquet files written with the arrow package use <a href="https://google.github.io/snappy/" class="external-link">Snappy compression</a> but other options such as gzip are also supported. See <code><a href="../reference/write_parquet.html">help("write_parquet", package = "arrow")</a></code> for more information.</p>
<p>Having written the Parquet file, we now can read it with <code><a href="../reference/read_parquet.html">read_parquet()</a></code>:</p>
<div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="fu"><a href="../reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## <span style="color: #949494;"># A tibble: 87 x 14</span></span></span>
<span><span class="co">## name height mass hair_color skin_color eye_color birth_year sex gender</span></span>
<span><span class="co">## <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> </span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 1</span> Luke Sk~ 172 77 blond fair blue 19 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 2</span> C-3PO 167 75 <span style="color: #BB0000;">NA</span> gold yellow 112 none mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 3</span> R2-D2 96 32 <span style="color: #BB0000;">NA</span> white, bl~ red 33 none mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 4</span> Darth V~ 202 136 none white yellow 41.9 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 5</span> Leia Or~ 150 49 brown light brown 19 fema~ femin~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 6</span> Owen La~ 178 120 brown, gr~ light blue 52 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 7</span> Beru Wh~ 165 75 brown light blue 47 fema~ femin~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 8</span> R5-D4 97 32 <span style="color: #BB0000;">NA</span> white, red red <span style="color: #BB0000;">NA</span> none mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 9</span> Biggs D~ 183 84 black light brown 24 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;">10</span> Obi-Wan~ 182 77 auburn, w~ fair blue-gray 57 male mascu~</span></span>
<span><span class="co">## <span style="color: #949494;"># i 77 more rows</span></span></span>
<span><span class="co">## <span style="color: #949494;"># i 5 more variables: homeworld &lt;chr&gt;, species &lt;chr&gt;, films &lt;list&lt;character&gt;&gt;,</span></span></span>
<span><span class="co">## <span style="color: #949494;"># vehicles &lt;list&lt;character&gt;&gt;, starships &lt;list&lt;character&gt;&gt;</span></span></span></code></pre>
<p>The default is to return a data frame or tibble. If we want an Arrow Table instead, we would set <code>as_data_frame = FALSE</code>:</p>
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="fu"><a href="../reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span>, as_data_frame <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## Table</span></span>
<span><span class="co">## 87 rows x 14 columns</span></span>
<span><span class="co">## $name &lt;string&gt;</span></span>
<span><span class="co">## $height &lt;int32&gt;</span></span>
<span><span class="co">## $mass &lt;double&gt;</span></span>
<span><span class="co">## $hair_color &lt;string&gt;</span></span>
<span><span class="co">## $skin_color &lt;string&gt;</span></span>
<span><span class="co">## $eye_color &lt;string&gt;</span></span>
<span><span class="co">## $birth_year &lt;double&gt;</span></span>
<span><span class="co">## $sex &lt;string&gt;</span></span>
<span><span class="co">## $gender &lt;string&gt;</span></span>
<span><span class="co">## $homeworld &lt;string&gt;</span></span>
<span><span class="co">## $species &lt;string&gt;</span></span>
<span><span class="co">## $films: list&lt;element &lt;string&gt;&gt;</span></span>
<span><span class="co">## $vehicles: list&lt;element &lt;string&gt;&gt;</span></span>
<span><span class="co">## $starships: list&lt;element &lt;string&gt;&gt;</span></span></code></pre>
<p>One useful feature of Parquet files is that they store data column-wise, and contain metadata that allow file readers to skip to the relevant sections of the file. That means it is possible to load only a subset of the columns without reading the complete file. The <code>col_select</code> argument to <code><a href="../reference/read_parquet.html">read_parquet()</a></code> supports this functionality:</p>
<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="fu"><a href="../reference/read_parquet.html">read_parquet</a></span><span class="op">(</span><span class="va">file_path</span>, col_select <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"name"</span>, <span class="st">"height"</span>, <span class="st">"mass"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## <span style="color: #949494;"># A tibble: 87 x 3</span></span></span>
<span><span class="co">## name height mass</span></span>
<span><span class="co">## <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span></span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 1</span> Luke Skywalker 172 77</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 2</span> C-3PO 167 75</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 3</span> R2-D2 96 32</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 4</span> Darth Vader 202 136</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 5</span> Leia Organa 150 49</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 6</span> Owen Lars 178 120</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 7</span> Beru Whitesun lars 165 75</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 8</span> R5-D4 97 32</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 9</span> Biggs Darklighter 183 84</span></span>
<span><span class="co">## <span style="color: #BCBCBC;">10</span> Obi-Wan Kenobi 182 77</span></span>
<span><span class="co">## <span style="color: #949494;"># i 77 more rows</span></span></span></code></pre>
<p>Fine-grained control over the Parquet reader is possible with the <code>props</code> argument. See <code><a href="../reference/ParquetArrowReaderProperties.html">help("ParquetArrowReaderProperties", package = "arrow")</a></code> for details.</p>
<p>R object attributes are preserved when writing data to Parquet or Arrow/Feather files and when reading those files back into R. This enables round-trip writing and reading of <code>sf::sf</code> objects, R data frames with with <code>haven::labelled</code> columns, and data frame with other custom attributes. To learn more about how metadata are handled in arrow, the <a href="./metadata.html">metadata article</a>.</p>
</div>
<div class="section level2">
<h2 id="arrowfeather-format">Arrow/Feather format<a class="anchor" aria-label="anchor" href="#arrowfeather-format"></a>
</h2>
<p>The Arrow file format was developed to provide binary columnar serialization for data frames, to make reading and writing data frames efficient, and to make sharing data across data analysis languages easy. This file format is sometimes referred to as Feather because it is an outgrowth of the original <a href="https://github.com/wesm/feather" class="external-link">Feather</a> project that has now been moved into the Arrow project itself. You can find the detailed specification of version 2 of the Arrow format – officially referred to as <a href="https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format" class="external-link">the Arrow IPC file format</a> – on the Arrow specification page.</p>
<p>The <code><a href="../reference/write_feather.html">write_feather()</a></code> function writes version 2 Arrow/Feather files by default, and supports multiple kinds of file compression. Basic use is shown below:</p>
<div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="va">file_path</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span></span>
<span><span class="fu"><a href="../reference/write_feather.html">write_feather</a></span><span class="op">(</span><span class="va">starwars</span>, <span class="va">file_path</span><span class="op">)</span></span></code></pre></div>
<p>The <code><a href="../reference/read_feather.html">read_feather()</a></code> function provides a familiar interface for reading feather files:</p>
<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="fu"><a href="../reference/read_feather.html">read_feather</a></span><span class="op">(</span><span class="va">file_path</span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## <span style="color: #949494;"># A tibble: 87 x 14</span></span></span>
<span><span class="co">## name height mass hair_color skin_color eye_color birth_year sex gender</span></span>
<span><span class="co">## <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> </span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 1</span> Luke Sk~ 172 77 blond fair blue 19 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 2</span> C-3PO 167 75 <span style="color: #BB0000;">NA</span> gold yellow 112 none mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 3</span> R2-D2 96 32 <span style="color: #BB0000;">NA</span> white, bl~ red 33 none mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 4</span> Darth V~ 202 136 none white yellow 41.9 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 5</span> Leia Or~ 150 49 brown light brown 19 fema~ femin~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 6</span> Owen La~ 178 120 brown, gr~ light blue 52 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 7</span> Beru Wh~ 165 75 brown light blue 47 fema~ femin~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 8</span> R5-D4 97 32 <span style="color: #BB0000;">NA</span> white, red red <span style="color: #BB0000;">NA</span> none mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 9</span> Biggs D~ 183 84 black light brown 24 male mascu~</span></span>
<span><span class="co">## <span style="color: #BCBCBC;">10</span> Obi-Wan~ 182 77 auburn, w~ fair blue-gray 57 male mascu~</span></span>
<span><span class="co">## <span style="color: #949494;"># i 77 more rows</span></span></span>
<span><span class="co">## <span style="color: #949494;"># i 5 more variables: homeworld &lt;chr&gt;, species &lt;chr&gt;, films &lt;list&lt;character&gt;&gt;,</span></span></span>
<span><span class="co">## <span style="color: #949494;"># vehicles &lt;list&lt;character&gt;&gt;, starships &lt;list&lt;character&gt;&gt;</span></span></span></code></pre>
<p>Like the Parquet reader, this reader supports reading a only subset of columns, and can produce Arrow Table output:</p>
<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="fu"><a href="../reference/read_feather.html">read_feather</a></span><span class="op">(</span></span>
<span> file <span class="op">=</span> <span class="va">file_path</span>,</span>
<span> col_select <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"name"</span>, <span class="st">"height"</span>, <span class="st">"mass"</span><span class="op">)</span>,</span>
<span> as_data_frame <span class="op">=</span> <span class="cn">FALSE</span></span>
<span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## Table</span></span>
<span><span class="co">## 87 rows x 3 columns</span></span>
<span><span class="co">## $name &lt;string&gt;</span></span>
<span><span class="co">## $height &lt;int32&gt;</span></span>
<span><span class="co">## $mass &lt;double&gt;</span></span></code></pre>
</div>
<div class="section level2">
<h2 id="csv-format">CSV format<a class="anchor" aria-label="anchor" href="#csv-format"></a>
</h2>
<p>The read/write capabilities of the arrow package also include support for CSV and other text-delimited files. The <code><a href="../reference/read_delim_arrow.html">read_csv_arrow()</a></code>, <code><a href="../reference/read_delim_arrow.html">read_tsv_arrow()</a></code>, and <code><a href="../reference/read_delim_arrow.html">read_delim_arrow()</a></code> functions all use the Arrow C++ CSV reader to read data files, where the Arrow C++ options have been mapped to arguments in a way that mirrors the conventions used in <code>readr::read_delim()</code>, with a <code>col_select</code> argument inspired by <code>vroom::vroom()</code>.</p>
<p>A simple example of writing and reading a CSV file with arrow is shown below:</p>
<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="va">file_path</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span></span>
<span><span class="fu"><a href="../reference/write_csv_arrow.html">write_csv_arrow</a></span><span class="op">(</span><span class="va">mtcars</span>, <span class="va">file_path</span><span class="op">)</span></span>
<span><span class="fu"><a href="../reference/read_delim_arrow.html">read_csv_arrow</a></span><span class="op">(</span><span class="va">file_path</span>, col_select <span class="op">=</span> <span class="fu"><a href="https://tidyselect.r-lib.org/reference/starts_with.html" class="external-link">starts_with</a></span><span class="op">(</span><span class="st">"d"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## <span style="color: #949494;"># A tibble: 32 x 2</span></span></span>
<span><span class="co">## disp drat</span></span>
<span><span class="co">## <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span></span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 1</span> 160 3.9 </span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 2</span> 160 3.9 </span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 3</span> 108 3.85</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 4</span> 258 3.08</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 5</span> 360 3.15</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 6</span> 225 2.76</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 7</span> 360 3.21</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 8</span> 147. 3.69</span></span>
<span><span class="co">## <span style="color: #BCBCBC;"> 9</span> 141. 3.92</span></span>
<span><span class="co">## <span style="color: #BCBCBC;">10</span> 168. 3.92</span></span>
<span><span class="co">## <span style="color: #949494;"># i 22 more rows</span></span></span></code></pre>
<p>In addition to the options provided by the readr-style arguments (<code>delim</code>, <code>quote</code>, <code>escape_doubple</code>, <code>escape_backslash</code>, etc), you can use the <code>schema</code> argument to specify column types: see <code><a href="../reference/schema.html">schema()</a></code> help for details. There is also the option of using <code>parse_options</code>, <code>convert_options</code>, and <code>read_options</code> to exercise fine-grained control over the arrow csv reader: see <code><a href="../reference/CsvReadOptions.html">help("CsvReadOptions", package = "arrow")</a></code> for details.</p>
</div>
<div class="section level2">
<h2 id="json-format">JSON format<a class="anchor" aria-label="anchor" href="#json-format"></a>
</h2>
<p>The arrow package supports reading (but not writing) of tabular data from line-delimited JSON, using the <code><a href="../reference/read_json_arrow.html">read_json_arrow()</a></code> function. A minimal example is shown below:</p>
<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span><span class="va">file_path</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span></span>
<span><span class="fu"><a href="https://rdrr.io/r/base/writeLines.html" class="external-link">writeLines</a></span><span class="op">(</span><span class="st">'</span></span>
<span><span class="st"> { "hello": 3.5, "world": false, "yo": "thing" }</span></span>
<span><span class="st"> { "hello": 3.25, "world": null }</span></span>
<span><span class="st"> { "hello": 0.0, "world": true, "yo": null }</span></span>
<span><span class="st"> '</span>, <span class="va">file_path</span>, useBytes <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span>
<span><span class="fu"><a href="../reference/read_json_arrow.html">read_json_arrow</a></span><span class="op">(</span><span class="va">file_path</span><span class="op">)</span></span></code></pre></div>
<pre><code><span><span class="co">## <span style="color: #949494;"># A tibble: 3 x 3</span></span></span>
<span><span class="co">## hello world yo </span></span>
<span><span class="co">## <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;lgl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span></span></span>
<span><span class="co">## <span style="color: #BCBCBC;">1</span> 3.5 FALSE thing</span></span>
<span><span class="co">## <span style="color: #BCBCBC;">2</span> 3.25 <span style="color: #BB0000;">NA</span> <span style="color: #BB0000;">NA</span> </span></span>
<span><span class="co">## <span style="color: #BCBCBC;">3</span> 0 TRUE <span style="color: #BB0000;">NA</span></span></span></code></pre>
</div>
<div class="section level2">
<h2 id="further-reading">Further reading<a class="anchor" aria-label="anchor" href="#further-reading"></a>
</h2>
<ul>
<li>To learn more about cloud storage, see the <a href="./fs.html">cloud storage article</a>.</li>
<li>To learn more about multi-file datasets, see the <a href="./dataset.html">datasets article</a>.</li>
<li>The Apache Arrow R cookbook has chapters on <a href="https://arrow.apache.org/cookbook/r/reading-and-writing-data---single-files.html" class="external-link">reading and writing single files</a> into memory and working with <a href="https://arrow.apache.org/cookbook/r/reading-and-writing-data---multiple-files.html" class="external-link">multi-file datasets</a> stored on-disk.</li>
</ul>
</div>
</main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
</nav></aside>
</div>
<footer><div class="pkgdown-footer-left">
<p></p>
<p><a href="https://arrow.apache.org/docs/r/versions.html">Older versions of these docs</a></p>
</div>
<div class="pkgdown-footer-right">
<p></p>
<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
</div>
</footer>
</div>
</body>
</html>