blob: 4dff08679e26af15fc86b68ad2a2110bc743317a [file] [log] [blame]
<!DOCTYPE html>
<html lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<title>2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook</title>
<meta name="description" content="2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook" />
<meta name="generator" content="bookdown 0.38 and GitBook 2.6.7" />
<meta property="og:title" content="2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook" />
<meta property="og:type" content="book" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:title" content="2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black" />
<link rel="prev" href="index.html"/>
<link rel="next" href="reading-and-writing-data---multiple-files.html"/>
<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
<style type="text/css">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<style type="text/css">
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
</style>
</head>
<body>
<div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
<div class="book-summary">
<nav role="navigation">
<ul class="summary">
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Preface</a>
<ul>
<li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#what-is-arrow"><i class="fa fa-check"></i><b>1.1</b> What is Arrow?</a></li>
<li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#alternative-resources"><i class="fa fa-check"></i><b>1.2</b> Alternative resources</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html"><i class="fa fa-check"></i><b>2</b> Reading and Writing Data - Single Files</a>
<ul>
<li class="chapter" data-level="2.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#introduction"><i class="fa fa-check"></i><b>2.1</b> Introduction</a></li>
<li class="chapter" data-level="2.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#convert-data-from-a-data-frame-to-an-arrow-table"><i class="fa fa-check"></i><b>2.2</b> Convert data from a data frame to an Arrow Table</a>
<ul>
<li class="chapter" data-level="2.2.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution"><i class="fa fa-check"></i><b>2.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.3" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#convert-data-from-an-arrow-table-to-a-data-frame"><i class="fa fa-check"></i><b>2.3</b> Convert data from an Arrow Table to a data frame</a>
<ul>
<li class="chapter" data-level="2.3.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-1"><i class="fa fa-check"></i><b>2.3.1</b> Solution</a></li>
<li class="chapter" data-level="2.3.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion"><i class="fa fa-check"></i><b>2.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="2.4" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-parquet-file"><i class="fa fa-check"></i><b>2.4</b> Write a Parquet file</a>
<ul>
<li class="chapter" data-level="2.4.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-2"><i class="fa fa-check"></i><b>2.4.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.5" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-parquet-file"><i class="fa fa-check"></i><b>2.5</b> Read a Parquet file</a>
<ul>
<li class="chapter" data-level="2.5.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-3"><i class="fa fa-check"></i><b>2.5.1</b> Solution</a></li>
<li class="chapter" data-level="2.5.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-1"><i class="fa fa-check"></i><b>2.5.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="2.6" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-parquet-file-from-s3"><i class="fa fa-check"></i><b>2.6</b> Read a Parquet file from S3</a>
<ul>
<li class="chapter" data-level="2.6.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-4"><i class="fa fa-check"></i><b>2.6.1</b> Solution</a></li>
<li class="chapter" data-level="2.6.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#see-also"><i class="fa fa-check"></i><b>2.6.2</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="2.7" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#filter-columns-while-reading-a-parquet-file"><i class="fa fa-check"></i><b>2.7</b> Filter columns while reading a Parquet file</a>
<ul>
<li class="chapter" data-level="2.7.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-5"><i class="fa fa-check"></i><b>2.7.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.8" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-feather-v2arrow-ipc-file"><i class="fa fa-check"></i><b>2.8</b> Write a Feather V2/Arrow IPC file</a>
<ul>
<li class="chapter" data-level="2.8.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-6"><i class="fa fa-check"></i><b>2.8.1</b> Solution</a></li>
<li class="chapter" data-level="2.8.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-2"><i class="fa fa-check"></i><b>2.8.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="2.9" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-featherarrow-ipc-file"><i class="fa fa-check"></i><b>2.9</b> Read a Feather/Arrow IPC file</a>
<ul>
<li class="chapter" data-level="2.9.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-7"><i class="fa fa-check"></i><b>2.9.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.10" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-streaming-arrow-ipc-files"><i class="fa fa-check"></i><b>2.10</b> Write streaming Arrow IPC files</a>
<ul>
<li class="chapter" data-level="2.10.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-8"><i class="fa fa-check"></i><b>2.10.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.11" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-streaming-arrow-ipc-files"><i class="fa fa-check"></i><b>2.11</b> Read streaming Arrow IPC files</a>
<ul>
<li class="chapter" data-level="2.11.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-9"><i class="fa fa-check"></i><b>2.11.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.12" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-csv-file"><i class="fa fa-check"></i><b>2.12</b> Write a CSV file</a>
<ul>
<li class="chapter" data-level="2.12.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-10"><i class="fa fa-check"></i><b>2.12.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.13" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-csv-file"><i class="fa fa-check"></i><b>2.13</b> Read a CSV file</a>
<ul>
<li class="chapter" data-level="2.13.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-11"><i class="fa fa-check"></i><b>2.13.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.14" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-json-file"><i class="fa fa-check"></i><b>2.14</b> Read a JSON file</a>
<ul>
<li class="chapter" data-level="2.14.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-12"><i class="fa fa-check"></i><b>2.14.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.15" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-compressed-single-data-file"><i class="fa fa-check"></i><b>2.15</b> Write a compressed single data file</a>
<ul>
<li class="chapter" data-level="2.15.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-13"><i class="fa fa-check"></i><b>2.15.1</b> Solution</a></li>
<li class="chapter" data-level="2.15.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#see-also-1"><i class="fa fa-check"></i><b>2.15.2</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="2.16" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-compressed-data"><i class="fa fa-check"></i><b>2.16</b> Read compressed data</a>
<ul>
<li class="chapter" data-level="2.16.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-14"><i class="fa fa-check"></i><b>2.16.1</b> Solution</a></li>
<li class="chapter" data-level="2.16.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-3"><i class="fa fa-check"></i><b>2.16.2</b> Discussion</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="3" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html"><i class="fa fa-check"></i><b>3</b> Reading and Writing Data - Multiple Files</a>
<ul>
<li class="chapter" data-level="3.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#introduction-1"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
<li class="chapter" data-level="3.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---parquet"><i class="fa fa-check"></i><b>3.2</b> Write data to disk - Parquet</a>
<ul>
<li class="chapter" data-level="3.2.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-15"><i class="fa fa-check"></i><b>3.2.1</b> Solution</a></li>
<li class="chapter" data-level="3.2.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-4"><i class="fa fa-check"></i><b>3.2.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-partitioned-data---parquet"><i class="fa fa-check"></i><b>3.3</b> Write partitioned data - Parquet</a>
<ul>
<li class="chapter" data-level="3.3.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-16"><i class="fa fa-check"></i><b>3.3.1</b> Solution</a></li>
<li class="chapter" data-level="3.3.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-5"><i class="fa fa-check"></i><b>3.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.4" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-partitioned-data"><i class="fa fa-check"></i><b>3.4</b> Read partitioned data</a>
<ul>
<li class="chapter" data-level="3.4.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-17"><i class="fa fa-check"></i><b>3.4.1</b> Solution</a></li>
<li class="chapter" data-level="3.4.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-6"><i class="fa fa-check"></i><b>3.4.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.5" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---featherarrow-ipc-format"><i class="fa fa-check"></i><b>3.5</b> Write data to disk - Feather/Arrow IPC format</a>
<ul>
<li class="chapter" data-level="3.5.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-18"><i class="fa fa-check"></i><b>3.5.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.6" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-featherarrow-ipc-data-as-an-arrow-dataset"><i class="fa fa-check"></i><b>3.6</b> Read in Feather/Arrow IPC data as an Arrow Dataset</a>
<ul>
<li class="chapter" data-level="3.6.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-19"><i class="fa fa-check"></i><b>3.6.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.7" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---csv-format"><i class="fa fa-check"></i><b>3.7</b> Write data to disk - CSV format</a>
<ul>
<li class="chapter" data-level="3.7.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-20"><i class="fa fa-check"></i><b>3.7.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.8" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-csv-data-as-an-arrow-dataset"><i class="fa fa-check"></i><b>3.8</b> Read in CSV data as an Arrow Dataset</a>
<ul>
<li class="chapter" data-level="3.8.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-21"><i class="fa fa-check"></i><b>3.8.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.9" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-a-csv-dataset-no-headers"><i class="fa fa-check"></i><b>3.9</b> Read in a CSV dataset (no headers)</a>
<ul>
<li class="chapter" data-level="3.9.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-22"><i class="fa fa-check"></i><b>3.9.1</b> Solution</a></li>
<li class="chapter" data-level="3.9.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-7"><i class="fa fa-check"></i><b>3.9.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.10" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-compressed-partitioned-data"><i class="fa fa-check"></i><b>3.10</b> Write compressed partitioned data</a>
<ul>
<li class="chapter" data-level="3.10.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-23"><i class="fa fa-check"></i><b>3.10.1</b> Solution</a></li>
<li class="chapter" data-level="3.10.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-8"><i class="fa fa-check"></i><b>3.10.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.11" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-compressed-data-1"><i class="fa fa-check"></i><b>3.11</b> Read compressed data</a>
<ul>
<li class="chapter" data-level="3.11.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-24"><i class="fa fa-check"></i><b>3.11.1</b> Solution</a></li>
<li class="chapter" data-level="3.11.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-9"><i class="fa fa-check"></i><b>3.11.2</b> Discussion</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="4" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html"><i class="fa fa-check"></i><b>4</b> Creating Arrow Objects</a>
<ul>
<li class="chapter" data-level="4.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#create-an-arrow-array-from-an-r-object"><i class="fa fa-check"></i><b>4.1</b> Create an Arrow Array from an R object</a>
<ul>
<li class="chapter" data-level="4.1.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-25"><i class="fa fa-check"></i><b>4.1.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="4.2" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#create-a-arrow-table-from-an-r-object"><i class="fa fa-check"></i><b>4.2</b> Create a Arrow Table from an R object</a>
<ul>
<li class="chapter" data-level="4.2.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-26"><i class="fa fa-check"></i><b>4.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="4.3" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#view-the-contents-of-an-arrow-table-or-recordbatch"><i class="fa fa-check"></i><b>4.3</b> View the contents of an Arrow Table or RecordBatch</a>
<ul>
<li class="chapter" data-level="4.3.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-27"><i class="fa fa-check"></i><b>4.3.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="4.4" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#manually-create-a-recordbatch-from-an-r-object."><i class="fa fa-check"></i><b>4.4</b> Manually create a RecordBatch from an R object.</a>
<ul>
<li class="chapter" data-level="4.4.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-28"><i class="fa fa-check"></i><b>4.4.1</b> Solution</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="5" data-path="defining-data-types.html"><a href="defining-data-types.html"><i class="fa fa-check"></i><b>5</b> Defining Data Types</a>
<ul>
<li class="chapter" data-level="5.1" data-path="defining-data-types.html"><a href="defining-data-types.html#introduction-2"><i class="fa fa-check"></i><b>5.1</b> Introduction</a></li>
<li class="chapter" data-level="5.2" data-path="defining-data-types.html"><a href="defining-data-types.html#update-data-type-of-an-existing-arrow-array"><i class="fa fa-check"></i><b>5.2</b> Update data type of an existing Arrow Array</a>
<ul>
<li class="chapter" data-level="5.2.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-29"><i class="fa fa-check"></i><b>5.2.1</b> Solution</a></li>
<li class="chapter" data-level="5.2.2" data-path="defining-data-types.html"><a href="defining-data-types.html#discussion-10"><i class="fa fa-check"></i><b>5.2.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="5.3" data-path="defining-data-types.html"><a href="defining-data-types.html#update-data-type-of-a-field-in-an-existing-arrow-table"><i class="fa fa-check"></i><b>5.3</b> Update data type of a field in an existing Arrow Table</a>
<ul>
<li class="chapter" data-level="5.3.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-30"><i class="fa fa-check"></i><b>5.3.1</b> Solution</a></li>
<li class="chapter" data-level="5.3.2" data-path="defining-data-types.html"><a href="defining-data-types.html#no-compat-type"><i class="fa fa-check"></i><b>5.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="5.4" data-path="defining-data-types.html"><a href="defining-data-types.html#specify-data-types-when-creating-an-arrow-table-from-an-r-object"><i class="fa fa-check"></i><b>5.4</b> Specify data types when creating an Arrow table from an R object</a>
<ul>
<li class="chapter" data-level="5.4.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-31"><i class="fa fa-check"></i><b>5.4.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="5.5" data-path="defining-data-types.html"><a href="defining-data-types.html#specify-data-types-when-reading-in-files"><i class="fa fa-check"></i><b>5.5</b> Specify data types when reading in files</a>
<ul>
<li class="chapter" data-level="5.5.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-32"><i class="fa fa-check"></i><b>5.5.1</b> Solution</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="6" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html"><i class="fa fa-check"></i><b>6</b> Manipulating Data - Arrays</a>
<ul>
<li class="chapter" data-level="6.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#introduction-3"><i class="fa fa-check"></i><b>6.1</b> Introduction</a></li>
<li class="chapter" data-level="6.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#filter-by-values-matching-a-predicate-or-mask"><i class="fa fa-check"></i><b>6.2</b> Filter by values matching a predicate or mask</a>
<ul>
<li class="chapter" data-level="6.2.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-33"><i class="fa fa-check"></i><b>6.2.1</b> Solution</a></li>
<li class="chapter" data-level="6.2.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-11"><i class="fa fa-check"></i><b>6.2.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.3" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#compute-meanminmax-etc-value-of-an-array"><i class="fa fa-check"></i><b>6.3</b> Compute Mean/Min/Max, etc value of an Array</a>
<ul>
<li class="chapter" data-level="6.3.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-34"><i class="fa fa-check"></i><b>6.3.1</b> Solution</a></li>
<li class="chapter" data-level="6.3.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-12"><i class="fa fa-check"></i><b>6.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.4" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#count-occurrences-of-elements-in-an-array"><i class="fa fa-check"></i><b>6.4</b> Count occurrences of elements in an Array</a>
<ul>
<li class="chapter" data-level="6.4.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-35"><i class="fa fa-check"></i><b>6.4.1</b> Solution</a></li>
<li class="chapter" data-level="6.4.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-13"><i class="fa fa-check"></i><b>6.4.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.5" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#apply-arithmetic-functions-to-arrays."><i class="fa fa-check"></i><b>6.5</b> Apply arithmetic functions to Arrays.</a>
<ul>
<li class="chapter" data-level="6.5.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-36"><i class="fa fa-check"></i><b>6.5.1</b> Solution</a></li>
<li class="chapter" data-level="6.5.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-14"><i class="fa fa-check"></i><b>6.5.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.6" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#call-arrow-compute-functions-directly-on-arrays"><i class="fa fa-check"></i><b>6.6</b> Call Arrow compute functions directly on Arrays</a>
<ul>
<li class="chapter" data-level="6.6.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-37"><i class="fa fa-check"></i><b>6.6.1</b> Solution</a></li>
<li class="chapter" data-level="6.6.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-15"><i class="fa fa-check"></i><b>6.6.2</b> Discussion</a></li>
<li class="chapter" data-level="6.6.3" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#see-also-2"><i class="fa fa-check"></i><b>6.6.3</b> See also</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="7" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html"><i class="fa fa-check"></i><b>7</b> Manipulating Data - Tables</a>
<ul>
<li class="chapter" data-level="7.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#introduction-4"><i class="fa fa-check"></i><b>7.1</b> Introduction</a></li>
<li class="chapter" data-level="7.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.2</b> Use dplyr verbs in Arrow</a>
<ul>
<li class="chapter" data-level="7.2.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-38"><i class="fa fa-check"></i><b>7.2.1</b> Solution</a></li>
<li class="chapter" data-level="7.2.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-16"><i class="fa fa-check"></i><b>7.2.2</b> Discussion</a></li>
<li class="chapter" data-level="7.2.3" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#see-also-3"><i class="fa fa-check"></i><b>7.2.3</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="7.3" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-r-functions-in-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.3</b> Use R functions in dplyr verbs in Arrow</a>
<ul>
<li class="chapter" data-level="7.3.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-39"><i class="fa fa-check"></i><b>7.3.1</b> Solution</a></li>
<li class="chapter" data-level="7.3.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-17"><i class="fa fa-check"></i><b>7.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="7.4" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-arrow-functions-in-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.4</b> Use Arrow functions in dplyr verbs in Arrow</a>
<ul>
<li class="chapter" data-level="7.4.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-40"><i class="fa fa-check"></i><b>7.4.1</b> Solution</a></li>
<li class="chapter" data-level="7.4.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-18"><i class="fa fa-check"></i><b>7.4.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="7.5" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#compute-window-aggregates"><i class="fa fa-check"></i><b>7.5</b> Compute Window Aggregates</a>
<ul>
<li class="chapter" data-level="7.5.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-41"><i class="fa fa-check"></i><b>7.5.1</b> Solution</a></li>
<li class="chapter" data-level="7.5.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discusson"><i class="fa fa-check"></i><b>7.5.2</b> Discusson</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="8" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html"><i class="fa fa-check"></i><b>8</b> Using PyArrow from R</a>
<ul>
<li class="chapter" data-level="8.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#introduction-5"><i class="fa fa-check"></i><b>8.1</b> Introduction</a></li>
<li class="chapter" data-level="8.2" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#create-an-arrow-object-using-pyarrow-in-r"><i class="fa fa-check"></i><b>8.2</b> Create an Arrow object using PyArrow in R</a>
<ul>
<li class="chapter" data-level="8.2.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#solution-42"><i class="fa fa-check"></i><b>8.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="8.3" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#call-a-pyarrow-function-from-r"><i class="fa fa-check"></i><b>8.3</b> Call a PyArrow function from R</a>
<ul>
<li class="chapter" data-level="8.3.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#solution-43"><i class="fa fa-check"></i><b>8.3.1</b> Solution</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="9" data-path="flight.html"><a href="flight.html"><i class="fa fa-check"></i><b>9</b> Flight</a>
<ul>
<li class="chapter" data-level="9.1" data-path="flight.html"><a href="flight.html#introduction-6"><i class="fa fa-check"></i><b>9.1</b> Introduction</a></li>
<li class="chapter" data-level="9.2" data-path="flight.html"><a href="flight.html#connect-to-a-flight-server"><i class="fa fa-check"></i><b>9.2</b> Connect to a Flight server</a>
<ul>
<li class="chapter" data-level="9.2.1" data-path="flight.html"><a href="flight.html#solution-44"><i class="fa fa-check"></i><b>9.2.1</b> Solution</a></li>
<li class="chapter" data-level="9.2.2" data-path="flight.html"><a href="flight.html#see-also-4"><i class="fa fa-check"></i><b>9.2.2</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="9.3" data-path="flight.html"><a href="flight.html#send-data-to-a-flight-server"><i class="fa fa-check"></i><b>9.3</b> Send data to a Flight server</a>
<ul>
<li class="chapter" data-level="9.3.1" data-path="flight.html"><a href="flight.html#solution-45"><i class="fa fa-check"></i><b>9.3.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="9.4" data-path="flight.html"><a href="flight.html#check-what-resources-exist-on-a-flight-server"><i class="fa fa-check"></i><b>9.4</b> Check what resources exist on a Flight server</a>
<ul>
<li class="chapter" data-level="9.4.1" data-path="flight.html"><a href="flight.html#solution-46"><i class="fa fa-check"></i><b>9.4.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="9.5" data-path="flight.html"><a href="flight.html#retrieve-data-from-a-flight-server"><i class="fa fa-check"></i><b>9.5</b> Retrieve data from a Flight server</a>
<ul>
<li class="chapter" data-level="9.5.1" data-path="flight.html"><a href="flight.html#solution-47"><i class="fa fa-check"></i><b>9.5.1</b> Solution</a></li>
</ul></li>
</ul></li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Apache Arrow R Cookbook</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<section class="normal" id="section-">
<div id="reading-and-writing-data---single-files" class="section level1 hasAnchor" number="2">
<h1><span class="header-section-number">2</span> Reading and Writing Data - Single Files<a href="reading-and-writing-data---single-files.html#reading-and-writing-data---single-files" class="anchor-section" aria-label="Anchor link to header"></a></h1>
<div id="introduction" class="section level2 hasAnchor" number="2.1">
<h2><span class="header-section-number">2.1</span> Introduction<a href="reading-and-writing-data---single-files.html#introduction" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>When reading files into R using Apache Arrow, you can read:</p>
<ul>
<li>a single file into memory as a data frame or an Arrow Table</li>
<li>a single file that is too large to fit in memory as an Arrow Dataset</li>
<li>multiple and partitioned files as an Arrow Dataset</li>
</ul>
<p>This chapter contains recipes related to using Apache Arrow to read and
write single file data into memory as an Arrow Table. There are a number of circumstances in
which you may want to read in single file data as an Arrow Table:</p>
<ul>
<li>your data file is large and having performance issues</li>
<li>you want faster performance from your <code>dplyr</code> queries</li>
<li>you want to be able to take advantage of Arrow’s compute functions</li>
</ul>
<p>If a single data file is too large to load into memory, you can use the Arrow Dataset API.
Recipes for using <code>open_dataset()</code> and <code>write_dataset()</code> are in the Reading and Writing Data - Multiple Files
chapter.</p>
</div>
<div id="convert-data-from-a-data-frame-to-an-arrow-table" class="section level2 hasAnchor" number="2.2">
<h2><span class="header-section-number">2.2</span> Convert data from a data frame to an Arrow Table<a href="reading-and-writing-data---single-files.html#convert-data-from-a-data-frame-to-an-arrow-table" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to convert an existing <code>data.frame</code> or <code>tibble</code> object into an Arrow Table.</p>
<div id="solution" class="section level3 hasAnchor" number="2.2.1">
<h3><span class="header-section-number">2.2.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="reading-and-writing-data---single-files.html#cb1-1" tabindex="-1"></a>air_table <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(airquality)</span>
<span id="cb1-2"><a href="reading-and-writing-data---single-files.html#cb1-2" tabindex="-1"></a>air_table</span></code></pre></div>
<pre><code>## Table
## 153 rows x 6 columns
## $Ozone &lt;int32&gt;
## $Solar.R &lt;int32&gt;
## $Wind &lt;double&gt;
## $Temp &lt;int32&gt;
## $Month &lt;int32&gt;
## $Day &lt;int32&gt;
##
## See $metadata for additional Schema metadata</code></pre>
</div>
</div>
<div id="convert-data-from-an-arrow-table-to-a-data-frame" class="section level2 hasAnchor" number="2.3">
<h2><span class="header-section-number">2.3</span> Convert data from an Arrow Table to a data frame<a href="reading-and-writing-data---single-files.html#convert-data-from-an-arrow-table-to-a-data-frame" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to convert an Arrow Table to a data frame to view the data or work with it
in your usual analytics pipeline.</p>
<div id="solution-1" class="section level3 hasAnchor" number="2.3.1">
<h3><span class="header-section-number">2.3.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-1" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="reading-and-writing-data---single-files.html#cb3-1" tabindex="-1"></a>air_df <span class="ot">&lt;-</span> <span class="fu">as.data.frame</span>(air_table)</span>
<span id="cb3-2"><a href="reading-and-writing-data---single-files.html#cb3-2" tabindex="-1"></a>air_df</span></code></pre></div>
<pre><code>## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
## 11 7 NA 6.9 74 5 11
## 12 16 256 9.7 69 5 12
## 13 11 290 9.2 66 5 13
## 14 14 274 10.9 68 5 14
## 15 18 65 13.2 58 5 15
## 16 14 334 11.5 64 5 16
## 17 34 307 12.0 66 5 17
## 18 6 78 18.4 57 5 18
## 19 30 322 11.5 68 5 19
## 20 11 44 9.7 62 5 20
## 21 1 8 9.7 59 5 21
## 22 11 320 16.6 73 5 22
## 23 4 25 9.7 61 5 23
## 24 32 92 12.0 61 5 24
## 25 NA 66 16.6 57 5 25
## 26 NA 266 14.9 58 5 26
## 27 NA NA 8.0 57 5 27
## 28 23 13 12.0 67 5 28
## 29 45 252 14.9 81 5 29
## 30 115 223 5.7 79 5 30
## 31 37 279 7.4 76 5 31
## 32 NA 286 8.6 78 6 1
## 33 NA 287 9.7 74 6 2
## 34 NA 242 16.1 67 6 3
## 35 NA 186 9.2 84 6 4
## 36 NA 220 8.6 85 6 5
## 37 NA 264 14.3 79 6 6
## 38 29 127 9.7 82 6 7
## 39 NA 273 6.9 87 6 8
## 40 71 291 13.8 90 6 9
## 41 39 323 11.5 87 6 10
## 42 NA 259 10.9 93 6 11
## 43 NA 250 9.2 92 6 12
## 44 23 148 8.0 82 6 13
## 45 NA 332 13.8 80 6 14
## 46 NA 322 11.5 79 6 15
## 47 21 191 14.9 77 6 16
## 48 37 284 20.7 72 6 17
## 49 20 37 9.2 65 6 18
## 50 12 120 11.5 73 6 19
## 51 13 137 10.3 76 6 20
## 52 NA 150 6.3 77 6 21
## 53 NA 59 1.7 76 6 22
## 54 NA 91 4.6 76 6 23
## 55 NA 250 6.3 76 6 24
## 56 NA 135 8.0 75 6 25
## 57 NA 127 8.0 78 6 26
## 58 NA 47 10.3 73 6 27
## 59 NA 98 11.5 80 6 28
## 60 NA 31 14.9 77 6 29
## 61 NA 138 8.0 83 6 30
## 62 135 269 4.1 84 7 1
## 63 49 248 9.2 85 7 2
## 64 32 236 9.2 81 7 3
## 65 NA 101 10.9 84 7 4
## 66 64 175 4.6 83 7 5
## 67 40 314 10.9 83 7 6
## 68 77 276 5.1 88 7 7
## 69 97 267 6.3 92 7 8
## 70 97 272 5.7 92 7 9
## 71 85 175 7.4 89 7 10
## 72 NA 139 8.6 82 7 11
## 73 10 264 14.3 73 7 12
## 74 27 175 14.9 81 7 13
## 75 NA 291 14.9 91 7 14
## 76 7 48 14.3 80 7 15
## 77 48 260 6.9 81 7 16
## 78 35 274 10.3 82 7 17
## 79 61 285 6.3 84 7 18
## 80 79 187 5.1 87 7 19
## 81 63 220 11.5 85 7 20
## 82 16 7 6.9 74 7 21
## 83 NA 258 9.7 81 7 22
## 84 NA 295 11.5 82 7 23
## 85 80 294 8.6 86 7 24
## 86 108 223 8.0 85 7 25
## 87 20 81 8.6 82 7 26
## 88 52 82 12.0 86 7 27
## 89 82 213 7.4 88 7 28
## 90 50 275 7.4 86 7 29
## 91 64 253 7.4 83 7 30
## 92 59 254 9.2 81 7 31
## 93 39 83 6.9 81 8 1
## 94 9 24 13.8 81 8 2
## 95 16 77 7.4 82 8 3
## 96 78 NA 6.9 86 8 4
## 97 35 NA 7.4 85 8 5
## 98 66 NA 4.6 87 8 6
## 99 122 255 4.0 89 8 7
## 100 89 229 10.3 90 8 8
## 101 110 207 8.0 90 8 9
## 102 NA 222 8.6 92 8 10
## 103 NA 137 11.5 86 8 11
## 104 44 192 11.5 86 8 12
## 105 28 273 11.5 82 8 13
## 106 65 157 9.7 80 8 14
## 107 NA 64 11.5 79 8 15
## 108 22 71 10.3 77 8 16
## 109 59 51 6.3 79 8 17
## 110 23 115 7.4 76 8 18
## 111 31 244 10.9 78 8 19
## 112 44 190 10.3 78 8 20
## 113 21 259 15.5 77 8 21
## 114 9 36 14.3 72 8 22
## 115 NA 255 12.6 75 8 23
## 116 45 212 9.7 79 8 24
## 117 168 238 3.4 81 8 25
## 118 73 215 8.0 86 8 26
## 119 NA 153 5.7 88 8 27
## 120 76 203 9.7 97 8 28
## 121 118 225 2.3 94 8 29
## 122 84 237 6.3 96 8 30
## 123 85 188 6.3 94 8 31
## 124 96 167 6.9 91 9 1
## 125 78 197 5.1 92 9 2
## 126 73 183 2.8 93 9 3
## 127 91 189 4.6 93 9 4
## 128 47 95 7.4 87 9 5
## 129 32 92 15.5 84 9 6
## 130 20 252 10.9 80 9 7
## 131 23 220 10.3 78 9 8
## 132 21 230 10.9 75 9 9
## 133 24 259 9.7 73 9 10
## 134 44 236 14.9 81 9 11
## 135 21 259 15.5 76 9 12
## 136 28 238 6.3 77 9 13
## 137 9 24 10.9 71 9 14
## 138 13 112 11.5 71 9 15
## 139 46 237 6.9 78 9 16
## 140 18 224 13.8 67 9 17
## 141 13 27 10.3 76 9 18
## 142 24 238 10.3 68 9 19
## 143 16 201 8.0 82 9 20
## 144 13 238 12.6 64 9 21
## 145 23 14 9.2 71 9 22
## 146 36 139 10.3 81 9 23
## 147 7 49 10.3 69 9 24
## 148 14 20 16.6 63 9 25
## 149 30 193 6.9 70 9 26
## 150 NA 145 13.2 77 9 27
## 151 14 191 14.3 75 9 28
## 152 18 131 8.0 76 9 29
## 153 20 223 11.5 68 9 30</code></pre>
</div>
<div id="discussion" class="section level3 hasAnchor" number="2.3.2">
<h3><span class="header-section-number">2.3.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>You can <code>dplyr::collect()</code> to return a tibble or <code>as.data.frame()</code> to return a <code>data.frame</code>.</p>
</div>
</div>
<div id="write-a-parquet-file" class="section level2 hasAnchor" number="2.4">
<h2><span class="header-section-number">2.4</span> Write a Parquet file<a href="reading-and-writing-data---single-files.html#write-a-parquet-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to write a single Parquet file to disk.</p>
<div id="solution-2" class="section level3 hasAnchor" number="2.4.1">
<h3><span class="header-section-number">2.4.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-2" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="reading-and-writing-data---single-files.html#cb5-1" tabindex="-1"></a><span class="co"># Create table</span></span>
<span id="cb5-2"><a href="reading-and-writing-data---single-files.html#cb5-2" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(tibble<span class="sc">::</span><span class="fu">tibble</span>(<span class="at">group =</span> <span class="fu">c</span>(<span class="st">&quot;A&quot;</span>, <span class="st">&quot;B&quot;</span>, <span class="st">&quot;C&quot;</span>), <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)))</span>
<span id="cb5-3"><a href="reading-and-writing-data---single-files.html#cb5-3" tabindex="-1"></a><span class="co"># Write to Parquet</span></span>
<span id="cb5-4"><a href="reading-and-writing-data---single-files.html#cb5-4" tabindex="-1"></a><span class="fu">write_parquet</span>(my_table, <span class="st">&quot;my_table.parquet&quot;</span>)</span></code></pre></div>
</div>
</div>
<div id="read-a-parquet-file" class="section level2 hasAnchor" number="2.5">
<h2><span class="header-section-number">2.5</span> Read a Parquet file<a href="reading-and-writing-data---single-files.html#read-a-parquet-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read a single Parquet file into memory.</p>
<div id="solution-3" class="section level3 hasAnchor" number="2.5.1">
<h3><span class="header-section-number">2.5.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-3" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="reading-and-writing-data---single-files.html#cb6-1" tabindex="-1"></a>parquet_tbl <span class="ot">&lt;-</span> <span class="fu">read_parquet</span>(<span class="st">&quot;my_table.parquet&quot;</span>)</span>
<span id="cb6-2"><a href="reading-and-writing-data---single-files.html#cb6-2" tabindex="-1"></a>parquet_tbl</span></code></pre></div>
<pre><code>## # A tibble: 3 × 2
## group score
## &lt;chr&gt; &lt;dbl&gt;
## 1 A 99
## 2 B 97
## 3 C 99</code></pre>
<p>As the argument <code>as_data_frame</code> was left set to its default value of <code>TRUE</code>, the file was read in as a tibble.</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="reading-and-writing-data---single-files.html#cb8-1" tabindex="-1"></a><span class="fu">class</span>(parquet_tbl)</span></code></pre></div>
<pre><code>## [1] &quot;tbl_df&quot; &quot;tbl&quot; &quot;data.frame&quot;</code></pre>
</div>
<div id="discussion-1" class="section level3 hasAnchor" number="2.5.2">
<h3><span class="header-section-number">2.5.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion-1" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>If you set <code>as_data_frame</code> to <code>FALSE</code>, the file will be read in as an Arrow Table.</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="reading-and-writing-data---single-files.html#cb10-1" tabindex="-1"></a>my_table_arrow <span class="ot">&lt;-</span> <span class="fu">read_parquet</span>(<span class="st">&quot;my_table.parquet&quot;</span>, <span class="at">as_data_frame =</span> <span class="cn">FALSE</span>)</span>
<span id="cb10-2"><a href="reading-and-writing-data---single-files.html#cb10-2" tabindex="-1"></a>my_table_arrow</span></code></pre></div>
<pre><code>## Table
## 3 rows x 2 columns
## $group &lt;string&gt;
## $score &lt;double&gt;</code></pre>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="reading-and-writing-data---single-files.html#cb12-1" tabindex="-1"></a><span class="fu">class</span>(my_table_arrow)</span></code></pre></div>
<pre><code>## [1] &quot;Table&quot; &quot;ArrowTabular&quot; &quot;ArrowObject&quot; &quot;R6&quot;</code></pre>
</div>
</div>
<div id="read-a-parquet-file-from-s3" class="section level2 hasAnchor" number="2.6">
<h2><span class="header-section-number">2.6</span> Read a Parquet file from S3<a href="reading-and-writing-data---single-files.html#read-a-parquet-file-from-s3" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read a single Parquet file from S3 into memory.</p>
<div id="solution-4" class="section level3 hasAnchor" number="2.6.1">
<h3><span class="header-section-number">2.6.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-4" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="reading-and-writing-data---single-files.html#cb14-1" tabindex="-1"></a>df <span class="ot">&lt;-</span> <span class="fu">read_parquet</span>(<span class="at">file =</span> <span class="st">&quot;s3://voltrondata-labs-datasets/nyc-taxi/year=2019/month=6/part-0.parquet&quot;</span>)</span></code></pre></div>
</div>
<div id="see-also" class="section level3 hasAnchor" number="2.6.2">
<h3><span class="header-section-number">2.6.2</span> See also<a href="reading-and-writing-data---single-files.html#see-also" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>For more in-depth instructions, including how to work with S3 buckets which require authentication, you can find a guide to reading and writing to/from S3 buckets here: <a href="https://arrow.apache.org/docs/r/articles/fs.html" class="uri">https://arrow.apache.org/docs/r/articles/fs.html</a>.</p>
</div>
</div>
<div id="filter-columns-while-reading-a-parquet-file" class="section level2 hasAnchor" number="2.7">
<h2><span class="header-section-number">2.7</span> Filter columns while reading a Parquet file<a href="reading-and-writing-data---single-files.html#filter-columns-while-reading-a-parquet-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to specify which columns to include when reading in a single Parquet file into memory.</p>
<div id="solution-5" class="section level3 hasAnchor" number="2.7.1">
<h3><span class="header-section-number">2.7.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-5" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="reading-and-writing-data---single-files.html#cb15-1" tabindex="-1"></a><span class="co"># Create table to read back in</span></span>
<span id="cb15-2"><a href="reading-and-writing-data---single-files.html#cb15-2" tabindex="-1"></a>dist_time <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(<span class="fu">data.frame</span>(<span class="at">distance =</span> <span class="fu">c</span>(<span class="fl">12.2</span>, <span class="fl">15.7</span>, <span class="fl">14.2</span>), <span class="at">time =</span> <span class="fu">c</span>(<span class="dv">43</span>, <span class="dv">44</span>, <span class="dv">40</span>)))</span>
<span id="cb15-3"><a href="reading-and-writing-data---single-files.html#cb15-3" tabindex="-1"></a><span class="co"># Write to Parquet</span></span>
<span id="cb15-4"><a href="reading-and-writing-data---single-files.html#cb15-4" tabindex="-1"></a><span class="fu">write_parquet</span>(dist_time, <span class="st">&quot;dist_time.parquet&quot;</span>)</span>
<span id="cb15-5"><a href="reading-and-writing-data---single-files.html#cb15-5" tabindex="-1"></a></span>
<span id="cb15-6"><a href="reading-and-writing-data---single-files.html#cb15-6" tabindex="-1"></a><span class="co"># Read in only the &quot;time&quot; column</span></span>
<span id="cb15-7"><a href="reading-and-writing-data---single-files.html#cb15-7" tabindex="-1"></a>time_only <span class="ot">&lt;-</span> <span class="fu">read_parquet</span>(<span class="st">&quot;dist_time.parquet&quot;</span>, <span class="at">col_select =</span> <span class="st">&quot;time&quot;</span>)</span>
<span id="cb15-8"><a href="reading-and-writing-data---single-files.html#cb15-8" tabindex="-1"></a>time_only</span></code></pre></div>
<pre><code>## # A tibble: 3 × 1
## time
## &lt;dbl&gt;
## 1 43
## 2 44
## 3 40</code></pre>
</div>
</div>
<div id="write-a-feather-v2arrow-ipc-file" class="section level2 hasAnchor" number="2.8">
<h2><span class="header-section-number">2.8</span> Write a Feather V2/Arrow IPC file<a href="reading-and-writing-data---single-files.html#write-a-feather-v2arrow-ipc-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to write a single Feather V2 file (also called Arrow IPC file).</p>
<div id="solution-6" class="section level3 hasAnchor" number="2.8.1">
<h3><span class="header-section-number">2.8.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-6" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="reading-and-writing-data---single-files.html#cb17-1" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(<span class="fu">data.frame</span>(<span class="at">group =</span> <span class="fu">c</span>(<span class="st">&quot;A&quot;</span>, <span class="st">&quot;B&quot;</span>, <span class="st">&quot;C&quot;</span>), <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)))</span>
<span id="cb17-2"><a href="reading-and-writing-data---single-files.html#cb17-2" tabindex="-1"></a><span class="fu">write_feather</span>(my_table, <span class="st">&quot;my_table.arrow&quot;</span>)</span></code></pre></div>
</div>
<div id="discussion-2" class="section level3 hasAnchor" number="2.8.2">
<h3><span class="header-section-number">2.8.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion-2" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>For legacy support, you can write data in the original Feather format by setting the <code>version</code> parameter to <code>1</code>.</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="reading-and-writing-data---single-files.html#cb18-1" tabindex="-1"></a><span class="co"># Create table</span></span>
<span id="cb18-2"><a href="reading-and-writing-data---single-files.html#cb18-2" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(<span class="fu">data.frame</span>(<span class="at">group =</span> <span class="fu">c</span>(<span class="st">&quot;A&quot;</span>, <span class="st">&quot;B&quot;</span>, <span class="st">&quot;C&quot;</span>), <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)))</span>
<span id="cb18-3"><a href="reading-and-writing-data---single-files.html#cb18-3" tabindex="-1"></a><span class="co"># Write to Feather format V1</span></span>
<span id="cb18-4"><a href="reading-and-writing-data---single-files.html#cb18-4" tabindex="-1"></a><span class="fu">write_feather</span>(mtcars, <span class="st">&quot;my_table.feather&quot;</span>, <span class="at">version =</span> <span class="dv">1</span>)</span></code></pre></div>
</div>
</div>
<div id="read-a-featherarrow-ipc-file" class="section level2 hasAnchor" number="2.9">
<h2><span class="header-section-number">2.9</span> Read a Feather/Arrow IPC file<a href="reading-and-writing-data---single-files.html#read-a-featherarrow-ipc-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read a single Feather V1 or V2 file into memory (also called Arrow IPC file).</p>
<div id="solution-7" class="section level3 hasAnchor" number="2.9.1">
<h3><span class="header-section-number">2.9.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-7" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="reading-and-writing-data---single-files.html#cb19-1" tabindex="-1"></a>my_feather_tbl <span class="ot">&lt;-</span> <span class="fu">read_feather</span>(<span class="st">&quot;my_table.arrow&quot;</span>)</span></code></pre></div>
</div>
</div>
<div id="write-streaming-arrow-ipc-files" class="section level2 hasAnchor" number="2.10">
<h2><span class="header-section-number">2.10</span> Write streaming Arrow IPC files<a href="reading-and-writing-data---single-files.html#write-streaming-arrow-ipc-files" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to write to the Arrow IPC stream format.</p>
<div id="solution-8" class="section level3 hasAnchor" number="2.10.1">
<h3><span class="header-section-number">2.10.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-8" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="reading-and-writing-data---single-files.html#cb20-1" tabindex="-1"></a><span class="co"># Create table</span></span>
<span id="cb20-2"><a href="reading-and-writing-data---single-files.html#cb20-2" tabindex="-1"></a>my_table <span class="ot">&lt;-</span> <span class="fu">arrow_table</span>(</span>
<span id="cb20-3"><a href="reading-and-writing-data---single-files.html#cb20-3" tabindex="-1"></a> <span class="fu">data.frame</span>(</span>
<span id="cb20-4"><a href="reading-and-writing-data---single-files.html#cb20-4" tabindex="-1"></a> <span class="at">group =</span> <span class="fu">c</span>(<span class="st">&quot;A&quot;</span>, <span class="st">&quot;B&quot;</span>, <span class="st">&quot;C&quot;</span>),</span>
<span id="cb20-5"><a href="reading-and-writing-data---single-files.html#cb20-5" tabindex="-1"></a> <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)</span>
<span id="cb20-6"><a href="reading-and-writing-data---single-files.html#cb20-6" tabindex="-1"></a> )</span>
<span id="cb20-7"><a href="reading-and-writing-data---single-files.html#cb20-7" tabindex="-1"></a>)</span>
<span id="cb20-8"><a href="reading-and-writing-data---single-files.html#cb20-8" tabindex="-1"></a><span class="co"># Write to IPC stream format</span></span>
<span id="cb20-9"><a href="reading-and-writing-data---single-files.html#cb20-9" tabindex="-1"></a><span class="fu">write_ipc_stream</span>(my_table, <span class="st">&quot;my_table.arrows&quot;</span>)</span></code></pre></div>
</div>
</div>
<div id="read-streaming-arrow-ipc-files" class="section level2 hasAnchor" number="2.11">
<h2><span class="header-section-number">2.11</span> Read streaming Arrow IPC files<a href="reading-and-writing-data---single-files.html#read-streaming-arrow-ipc-files" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read from the Arrow IPC stream format.</p>
<div id="solution-9" class="section level3 hasAnchor" number="2.11.1">
<h3><span class="header-section-number">2.11.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-9" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="reading-and-writing-data---single-files.html#cb21-1" tabindex="-1"></a>my_ipc_stream <span class="ot">&lt;-</span> arrow<span class="sc">::</span><span class="fu">read_ipc_stream</span>(<span class="st">&quot;my_table.arrows&quot;</span>)</span></code></pre></div>
</div>
</div>
<div id="write-a-csv-file" class="section level2 hasAnchor" number="2.12">
<h2><span class="header-section-number">2.12</span> Write a CSV file<a href="reading-and-writing-data---single-files.html#write-a-csv-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to write Arrow data to a single CSV file.</p>
<div id="solution-10" class="section level3 hasAnchor" number="2.12.1">
<h3><span class="header-section-number">2.12.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-10" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="reading-and-writing-data---single-files.html#cb22-1" tabindex="-1"></a><span class="fu">write_csv_arrow</span>(cars, <span class="st">&quot;cars.csv&quot;</span>)</span></code></pre></div>
</div>
</div>
<div id="read-a-csv-file" class="section level2 hasAnchor" number="2.13">
<h2><span class="header-section-number">2.13</span> Read a CSV file<a href="reading-and-writing-data---single-files.html#read-a-csv-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read a single CSV file into memory.</p>
<div id="solution-11" class="section level3 hasAnchor" number="2.13.1">
<h3><span class="header-section-number">2.13.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-11" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="reading-and-writing-data---single-files.html#cb23-1" tabindex="-1"></a>my_csv <span class="ot">&lt;-</span> <span class="fu">read_csv_arrow</span>(<span class="st">&quot;cars.csv&quot;</span>, <span class="at">as_data_frame =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
</div>
</div>
<div id="read-a-json-file" class="section level2 hasAnchor" number="2.14">
<h2><span class="header-section-number">2.14</span> Read a JSON file<a href="reading-and-writing-data---single-files.html#read-a-json-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read a JSON file into memory.</p>
<div id="solution-12" class="section level3 hasAnchor" number="2.14.1">
<h3><span class="header-section-number">2.14.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-12" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="reading-and-writing-data---single-files.html#cb24-1" tabindex="-1"></a><span class="co"># Create a file to read back in</span></span>
<span id="cb24-2"><a href="reading-and-writing-data---single-files.html#cb24-2" tabindex="-1"></a>tf <span class="ot">&lt;-</span> <span class="fu">tempfile</span>()</span>
<span id="cb24-3"><a href="reading-and-writing-data---single-files.html#cb24-3" tabindex="-1"></a><span class="fu">writeLines</span>(<span class="st">&#39;</span></span>
<span id="cb24-4"><a href="reading-and-writing-data---single-files.html#cb24-4" tabindex="-1"></a><span class="st"> {&quot;country&quot;: &quot;United Kingdom&quot;, &quot;code&quot;: &quot;GB&quot;, &quot;long&quot;: -3.44, &quot;lat&quot;: 55.38}</span></span>
<span id="cb24-5"><a href="reading-and-writing-data---single-files.html#cb24-5" tabindex="-1"></a><span class="st"> {&quot;country&quot;: &quot;France&quot;, &quot;code&quot;: &quot;FR&quot;, &quot;long&quot;: 2.21, &quot;lat&quot;: 46.23}</span></span>
<span id="cb24-6"><a href="reading-and-writing-data---single-files.html#cb24-6" tabindex="-1"></a><span class="st"> {&quot;country&quot;: &quot;Germany&quot;, &quot;code&quot;: &quot;DE&quot;, &quot;long&quot;: 10.45, &quot;lat&quot;: 51.17}</span></span>
<span id="cb24-7"><a href="reading-and-writing-data---single-files.html#cb24-7" tabindex="-1"></a><span class="st"> &#39;</span>, tf, <span class="at">useBytes =</span> <span class="cn">TRUE</span>)</span>
<span id="cb24-8"><a href="reading-and-writing-data---single-files.html#cb24-8" tabindex="-1"></a></span>
<span id="cb24-9"><a href="reading-and-writing-data---single-files.html#cb24-9" tabindex="-1"></a><span class="co"># Read in the data</span></span>
<span id="cb24-10"><a href="reading-and-writing-data---single-files.html#cb24-10" tabindex="-1"></a>countries <span class="ot">&lt;-</span> <span class="fu">read_json_arrow</span>(tf, <span class="at">col_select =</span> <span class="fu">c</span>(<span class="st">&quot;country&quot;</span>, <span class="st">&quot;long&quot;</span>, <span class="st">&quot;lat&quot;</span>))</span>
<span id="cb24-11"><a href="reading-and-writing-data---single-files.html#cb24-11" tabindex="-1"></a>countries</span></code></pre></div>
<pre><code>## # A tibble: 3 × 3
## country long lat
## &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt;
## 1 United Kingdom -3.44 55.4
## 2 France 2.21 46.2
## 3 Germany 10.4 51.2</code></pre>
</div>
</div>
<div id="write-a-compressed-single-data-file" class="section level2 hasAnchor" number="2.15">
<h2><span class="header-section-number">2.15</span> Write a compressed single data file<a href="reading-and-writing-data---single-files.html#write-a-compressed-single-data-file" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to save a single file, compressed with a specified compression algorithm.</p>
<div id="solution-13" class="section level3 hasAnchor" number="2.15.1">
<h3><span class="header-section-number">2.15.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-13" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="reading-and-writing-data---single-files.html#cb26-1" tabindex="-1"></a><span class="co"># Create a temporary directory</span></span>
<span id="cb26-2"><a href="reading-and-writing-data---single-files.html#cb26-2" tabindex="-1"></a>td <span class="ot">&lt;-</span> <span class="fu">tempfile</span>()</span>
<span id="cb26-3"><a href="reading-and-writing-data---single-files.html#cb26-3" tabindex="-1"></a><span class="fu">dir.create</span>(td)</span>
<span id="cb26-4"><a href="reading-and-writing-data---single-files.html#cb26-4" tabindex="-1"></a></span>
<span id="cb26-5"><a href="reading-and-writing-data---single-files.html#cb26-5" tabindex="-1"></a><span class="co"># Write data compressed with the gzip algorithm instead of the default</span></span>
<span id="cb26-6"><a href="reading-and-writing-data---single-files.html#cb26-6" tabindex="-1"></a><span class="fu">write_parquet</span>(iris, <span class="fu">file.path</span>(td, <span class="st">&quot;iris.parquet&quot;</span>), <span class="at">compression =</span> <span class="st">&quot;gzip&quot;</span>)</span></code></pre></div>
</div>
<div id="see-also-1" class="section level3 hasAnchor" number="2.15.2">
<h3><span class="header-section-number">2.15.2</span> See also<a href="reading-and-writing-data---single-files.html#see-also-1" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>Some formats write compressed data by default. For more information
on the supported compression algorithms and default settings, see:</p>
<ul>
<li><code>?write_parquet()</code></li>
<li><code>?write_feather()</code></li>
</ul>
</div>
</div>
<div id="read-compressed-data" class="section level2 hasAnchor" number="2.16">
<h2><span class="header-section-number">2.16</span> Read compressed data<a href="reading-and-writing-data---single-files.html#read-compressed-data" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to read in a single data file which has been compressed.</p>
<div id="solution-14" class="section level3 hasAnchor" number="2.16.1">
<h3><span class="header-section-number">2.16.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-14" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="reading-and-writing-data---single-files.html#cb27-1" tabindex="-1"></a><span class="co"># Create a temporary directory</span></span>
<span id="cb27-2"><a href="reading-and-writing-data---single-files.html#cb27-2" tabindex="-1"></a>td <span class="ot">&lt;-</span> <span class="fu">tempfile</span>()</span>
<span id="cb27-3"><a href="reading-and-writing-data---single-files.html#cb27-3" tabindex="-1"></a><span class="fu">dir.create</span>(td)</span>
<span id="cb27-4"><a href="reading-and-writing-data---single-files.html#cb27-4" tabindex="-1"></a></span>
<span id="cb27-5"><a href="reading-and-writing-data---single-files.html#cb27-5" tabindex="-1"></a><span class="co"># Write data which is to be read back in</span></span>
<span id="cb27-6"><a href="reading-and-writing-data---single-files.html#cb27-6" tabindex="-1"></a><span class="fu">write_parquet</span>(iris, <span class="fu">file.path</span>(td, <span class="st">&quot;iris.parquet&quot;</span>), <span class="at">compression =</span> <span class="st">&quot;gzip&quot;</span>)</span>
<span id="cb27-7"><a href="reading-and-writing-data---single-files.html#cb27-7" tabindex="-1"></a></span>
<span id="cb27-8"><a href="reading-and-writing-data---single-files.html#cb27-8" tabindex="-1"></a><span class="co"># Read in data</span></span>
<span id="cb27-9"><a href="reading-and-writing-data---single-files.html#cb27-9" tabindex="-1"></a>ds <span class="ot">&lt;-</span> <span class="fu">read_parquet</span>(<span class="fu">file.path</span>(td, <span class="st">&quot;iris.parquet&quot;</span>))</span>
<span id="cb27-10"><a href="reading-and-writing-data---single-files.html#cb27-10" tabindex="-1"></a>ds</span></code></pre></div>
<pre><code>## # A tibble: 150 × 5
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;fct&gt;
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## # ℹ 140 more rows</code></pre>
</div>
<div id="discussion-3" class="section level3 hasAnchor" number="2.16.2">
<h3><span class="header-section-number">2.16.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion-3" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>Note that Arrow automatically detects the compression and you do not have to
supply it in the call to the <code>read_*()</code> or the <code>open_dataset()</code> functions.</p>
<p>Although the CSV format does not support compression itself, Arrow supports
reading in CSV data which has been compressed, if the file extension is <code>.gz</code>.</p>
<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="reading-and-writing-data---single-files.html#cb29-1" tabindex="-1"></a><span class="co"># Create a temporary directory</span></span>
<span id="cb29-2"><a href="reading-and-writing-data---single-files.html#cb29-2" tabindex="-1"></a>td <span class="ot">&lt;-</span> <span class="fu">tempfile</span>()</span>
<span id="cb29-3"><a href="reading-and-writing-data---single-files.html#cb29-3" tabindex="-1"></a><span class="fu">dir.create</span>(td)</span>
<span id="cb29-4"><a href="reading-and-writing-data---single-files.html#cb29-4" tabindex="-1"></a></span>
<span id="cb29-5"><a href="reading-and-writing-data---single-files.html#cb29-5" tabindex="-1"></a><span class="co"># Write data which is to be read back in</span></span>
<span id="cb29-6"><a href="reading-and-writing-data---single-files.html#cb29-6" tabindex="-1"></a><span class="fu">write.csv</span>(iris, <span class="fu">gzfile</span>(<span class="fu">file.path</span>(td, <span class="st">&quot;iris.csv.gz&quot;</span>)), <span class="at">row.names =</span> <span class="cn">FALSE</span>, <span class="at">quote =</span> <span class="cn">FALSE</span>)</span>
<span id="cb29-7"><a href="reading-and-writing-data---single-files.html#cb29-7" tabindex="-1"></a></span>
<span id="cb29-8"><a href="reading-and-writing-data---single-files.html#cb29-8" tabindex="-1"></a><span class="co"># Read in data</span></span>
<span id="cb29-9"><a href="reading-and-writing-data---single-files.html#cb29-9" tabindex="-1"></a>ds <span class="ot">&lt;-</span> <span class="fu">read_csv_arrow</span>(<span class="fu">file.path</span>(td, <span class="st">&quot;iris.csv.gz&quot;</span>))</span>
<span id="cb29-10"><a href="reading-and-writing-data---single-files.html#cb29-10" tabindex="-1"></a>ds</span></code></pre></div>
<pre><code>## # A tibble: 150 × 5
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;chr&gt;
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## # ℹ 140 more rows</code></pre>
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
</div>
</div>
</div>
</section>
</div>
</div>
</div>
<a href="index.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="reading-and-writing-data---multiple-files.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
</div>
</div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"whatsapp": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": "https://github.com/apache/arrow-cookbook/edit/main/r/content/reading_and_writing_data.Rmd",
"text": "Edit"
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": null,
"search": {
"engine": "fuse",
"options": null
},
"toc": {
"collapse": "subsection"
}
});
});
</script>
</body>
</html>