blob: fd0277f5d02ee21db0b16e57c5c6b90b40876f0a [file] [log] [blame]
<!DOCTYPE html>
<html lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<title>7 Manipulating Data - Tables | Apache Arrow R Cookbook</title>
<meta name="description" content="7 Manipulating Data - Tables | Apache Arrow R Cookbook" />
<meta name="generator" content="bookdown 0.38 and GitBook 2.6.7" />
<meta property="og:title" content="7 Manipulating Data - Tables | Apache Arrow R Cookbook" />
<meta property="og:type" content="book" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:title" content="7 Manipulating Data - Tables | Apache Arrow R Cookbook" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black" />
<link rel="prev" href="manipulating-data---arrays.html"/>
<link rel="next" href="using-pyarrow-from-r.html"/>
<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
<style type="text/css">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<style type="text/css">
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
</style>
</head>
<body>
<div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
<div class="book-summary">
<nav role="navigation">
<ul class="summary">
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Preface</a>
<ul>
<li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#what-is-arrow"><i class="fa fa-check"></i><b>1.1</b> What is Arrow?</a></li>
<li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#alternative-resources"><i class="fa fa-check"></i><b>1.2</b> Alternative resources</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html"><i class="fa fa-check"></i><b>2</b> Reading and Writing Data - Single Files</a>
<ul>
<li class="chapter" data-level="2.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#introduction"><i class="fa fa-check"></i><b>2.1</b> Introduction</a></li>
<li class="chapter" data-level="2.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#convert-data-from-a-data-frame-to-an-arrow-table"><i class="fa fa-check"></i><b>2.2</b> Convert data from a data frame to an Arrow Table</a>
<ul>
<li class="chapter" data-level="2.2.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution"><i class="fa fa-check"></i><b>2.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.3" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#convert-data-from-an-arrow-table-to-a-data-frame"><i class="fa fa-check"></i><b>2.3</b> Convert data from an Arrow Table to a data frame</a>
<ul>
<li class="chapter" data-level="2.3.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-1"><i class="fa fa-check"></i><b>2.3.1</b> Solution</a></li>
<li class="chapter" data-level="2.3.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion"><i class="fa fa-check"></i><b>2.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="2.4" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-parquet-file"><i class="fa fa-check"></i><b>2.4</b> Write a Parquet file</a>
<ul>
<li class="chapter" data-level="2.4.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-2"><i class="fa fa-check"></i><b>2.4.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.5" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-parquet-file"><i class="fa fa-check"></i><b>2.5</b> Read a Parquet file</a>
<ul>
<li class="chapter" data-level="2.5.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-3"><i class="fa fa-check"></i><b>2.5.1</b> Solution</a></li>
<li class="chapter" data-level="2.5.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-1"><i class="fa fa-check"></i><b>2.5.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="2.6" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-parquet-file-from-s3"><i class="fa fa-check"></i><b>2.6</b> Read a Parquet file from S3</a>
<ul>
<li class="chapter" data-level="2.6.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-4"><i class="fa fa-check"></i><b>2.6.1</b> Solution</a></li>
<li class="chapter" data-level="2.6.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#see-also"><i class="fa fa-check"></i><b>2.6.2</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="2.7" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#filter-columns-while-reading-a-parquet-file"><i class="fa fa-check"></i><b>2.7</b> Filter columns while reading a Parquet file</a>
<ul>
<li class="chapter" data-level="2.7.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-5"><i class="fa fa-check"></i><b>2.7.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.8" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-feather-v2arrow-ipc-file"><i class="fa fa-check"></i><b>2.8</b> Write a Feather V2/Arrow IPC file</a>
<ul>
<li class="chapter" data-level="2.8.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-6"><i class="fa fa-check"></i><b>2.8.1</b> Solution</a></li>
<li class="chapter" data-level="2.8.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-2"><i class="fa fa-check"></i><b>2.8.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="2.9" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-featherarrow-ipc-file"><i class="fa fa-check"></i><b>2.9</b> Read a Feather/Arrow IPC file</a>
<ul>
<li class="chapter" data-level="2.9.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-7"><i class="fa fa-check"></i><b>2.9.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.10" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-streaming-arrow-ipc-files"><i class="fa fa-check"></i><b>2.10</b> Write streaming Arrow IPC files</a>
<ul>
<li class="chapter" data-level="2.10.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-8"><i class="fa fa-check"></i><b>2.10.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.11" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-streaming-arrow-ipc-files"><i class="fa fa-check"></i><b>2.11</b> Read streaming Arrow IPC files</a>
<ul>
<li class="chapter" data-level="2.11.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-9"><i class="fa fa-check"></i><b>2.11.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.12" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-csv-file"><i class="fa fa-check"></i><b>2.12</b> Write a CSV file</a>
<ul>
<li class="chapter" data-level="2.12.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-10"><i class="fa fa-check"></i><b>2.12.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.13" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-csv-file"><i class="fa fa-check"></i><b>2.13</b> Read a CSV file</a>
<ul>
<li class="chapter" data-level="2.13.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-11"><i class="fa fa-check"></i><b>2.13.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.14" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-json-file"><i class="fa fa-check"></i><b>2.14</b> Read a JSON file</a>
<ul>
<li class="chapter" data-level="2.14.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-12"><i class="fa fa-check"></i><b>2.14.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="2.15" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-compressed-single-data-file"><i class="fa fa-check"></i><b>2.15</b> Write a compressed single data file</a>
<ul>
<li class="chapter" data-level="2.15.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-13"><i class="fa fa-check"></i><b>2.15.1</b> Solution</a></li>
<li class="chapter" data-level="2.15.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#see-also-1"><i class="fa fa-check"></i><b>2.15.2</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="2.16" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-compressed-data"><i class="fa fa-check"></i><b>2.16</b> Read compressed data</a>
<ul>
<li class="chapter" data-level="2.16.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-14"><i class="fa fa-check"></i><b>2.16.1</b> Solution</a></li>
<li class="chapter" data-level="2.16.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-3"><i class="fa fa-check"></i><b>2.16.2</b> Discussion</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="3" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html"><i class="fa fa-check"></i><b>3</b> Reading and Writing Data - Multiple Files</a>
<ul>
<li class="chapter" data-level="3.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#introduction-1"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
<li class="chapter" data-level="3.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---parquet"><i class="fa fa-check"></i><b>3.2</b> Write data to disk - Parquet</a>
<ul>
<li class="chapter" data-level="3.2.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-15"><i class="fa fa-check"></i><b>3.2.1</b> Solution</a></li>
<li class="chapter" data-level="3.2.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-4"><i class="fa fa-check"></i><b>3.2.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-partitioned-data---parquet"><i class="fa fa-check"></i><b>3.3</b> Write partitioned data - Parquet</a>
<ul>
<li class="chapter" data-level="3.3.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-16"><i class="fa fa-check"></i><b>3.3.1</b> Solution</a></li>
<li class="chapter" data-level="3.3.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-5"><i class="fa fa-check"></i><b>3.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.4" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-partitioned-data"><i class="fa fa-check"></i><b>3.4</b> Read partitioned data</a>
<ul>
<li class="chapter" data-level="3.4.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-17"><i class="fa fa-check"></i><b>3.4.1</b> Solution</a></li>
<li class="chapter" data-level="3.4.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-6"><i class="fa fa-check"></i><b>3.4.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.5" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---featherarrow-ipc-format"><i class="fa fa-check"></i><b>3.5</b> Write data to disk - Feather/Arrow IPC format</a>
<ul>
<li class="chapter" data-level="3.5.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-18"><i class="fa fa-check"></i><b>3.5.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.6" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-featherarrow-ipc-data-as-an-arrow-dataset"><i class="fa fa-check"></i><b>3.6</b> Read in Feather/Arrow IPC data as an Arrow Dataset</a>
<ul>
<li class="chapter" data-level="3.6.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-19"><i class="fa fa-check"></i><b>3.6.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.7" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---csv-format"><i class="fa fa-check"></i><b>3.7</b> Write data to disk - CSV format</a>
<ul>
<li class="chapter" data-level="3.7.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-20"><i class="fa fa-check"></i><b>3.7.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.8" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-csv-data-as-an-arrow-dataset"><i class="fa fa-check"></i><b>3.8</b> Read in CSV data as an Arrow Dataset</a>
<ul>
<li class="chapter" data-level="3.8.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-21"><i class="fa fa-check"></i><b>3.8.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.9" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-a-csv-dataset-no-headers"><i class="fa fa-check"></i><b>3.9</b> Read in a CSV dataset (no headers)</a>
<ul>
<li class="chapter" data-level="3.9.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-22"><i class="fa fa-check"></i><b>3.9.1</b> Solution</a></li>
<li class="chapter" data-level="3.9.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-7"><i class="fa fa-check"></i><b>3.9.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.10" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-compressed-partitioned-data"><i class="fa fa-check"></i><b>3.10</b> Write compressed partitioned data</a>
<ul>
<li class="chapter" data-level="3.10.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-23"><i class="fa fa-check"></i><b>3.10.1</b> Solution</a></li>
<li class="chapter" data-level="3.10.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-8"><i class="fa fa-check"></i><b>3.10.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="3.11" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-compressed-data-1"><i class="fa fa-check"></i><b>3.11</b> Read compressed data</a>
<ul>
<li class="chapter" data-level="3.11.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-24"><i class="fa fa-check"></i><b>3.11.1</b> Solution</a></li>
<li class="chapter" data-level="3.11.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-9"><i class="fa fa-check"></i><b>3.11.2</b> Discussion</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="4" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html"><i class="fa fa-check"></i><b>4</b> Creating Arrow Objects</a>
<ul>
<li class="chapter" data-level="4.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#create-an-arrow-array-from-an-r-object"><i class="fa fa-check"></i><b>4.1</b> Create an Arrow Array from an R object</a>
<ul>
<li class="chapter" data-level="4.1.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-25"><i class="fa fa-check"></i><b>4.1.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="4.2" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#create-a-arrow-table-from-an-r-object"><i class="fa fa-check"></i><b>4.2</b> Create a Arrow Table from an R object</a>
<ul>
<li class="chapter" data-level="4.2.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-26"><i class="fa fa-check"></i><b>4.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="4.3" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#view-the-contents-of-an-arrow-table-or-recordbatch"><i class="fa fa-check"></i><b>4.3</b> View the contents of an Arrow Table or RecordBatch</a>
<ul>
<li class="chapter" data-level="4.3.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-27"><i class="fa fa-check"></i><b>4.3.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="4.4" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#manually-create-a-recordbatch-from-an-r-object."><i class="fa fa-check"></i><b>4.4</b> Manually create a RecordBatch from an R object.</a>
<ul>
<li class="chapter" data-level="4.4.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-28"><i class="fa fa-check"></i><b>4.4.1</b> Solution</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="5" data-path="defining-data-types.html"><a href="defining-data-types.html"><i class="fa fa-check"></i><b>5</b> Defining Data Types</a>
<ul>
<li class="chapter" data-level="5.1" data-path="defining-data-types.html"><a href="defining-data-types.html#introduction-2"><i class="fa fa-check"></i><b>5.1</b> Introduction</a></li>
<li class="chapter" data-level="5.2" data-path="defining-data-types.html"><a href="defining-data-types.html#update-data-type-of-an-existing-arrow-array"><i class="fa fa-check"></i><b>5.2</b> Update data type of an existing Arrow Array</a>
<ul>
<li class="chapter" data-level="5.2.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-29"><i class="fa fa-check"></i><b>5.2.1</b> Solution</a></li>
<li class="chapter" data-level="5.2.2" data-path="defining-data-types.html"><a href="defining-data-types.html#discussion-10"><i class="fa fa-check"></i><b>5.2.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="5.3" data-path="defining-data-types.html"><a href="defining-data-types.html#update-data-type-of-a-field-in-an-existing-arrow-table"><i class="fa fa-check"></i><b>5.3</b> Update data type of a field in an existing Arrow Table</a>
<ul>
<li class="chapter" data-level="5.3.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-30"><i class="fa fa-check"></i><b>5.3.1</b> Solution</a></li>
<li class="chapter" data-level="5.3.2" data-path="defining-data-types.html"><a href="defining-data-types.html#no-compat-type"><i class="fa fa-check"></i><b>5.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="5.4" data-path="defining-data-types.html"><a href="defining-data-types.html#specify-data-types-when-creating-an-arrow-table-from-an-r-object"><i class="fa fa-check"></i><b>5.4</b> Specify data types when creating an Arrow table from an R object</a>
<ul>
<li class="chapter" data-level="5.4.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-31"><i class="fa fa-check"></i><b>5.4.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="5.5" data-path="defining-data-types.html"><a href="defining-data-types.html#specify-data-types-when-reading-in-files"><i class="fa fa-check"></i><b>5.5</b> Specify data types when reading in files</a>
<ul>
<li class="chapter" data-level="5.5.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-32"><i class="fa fa-check"></i><b>5.5.1</b> Solution</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="6" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html"><i class="fa fa-check"></i><b>6</b> Manipulating Data - Arrays</a>
<ul>
<li class="chapter" data-level="6.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#introduction-3"><i class="fa fa-check"></i><b>6.1</b> Introduction</a></li>
<li class="chapter" data-level="6.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#filter-by-values-matching-a-predicate-or-mask"><i class="fa fa-check"></i><b>6.2</b> Filter by values matching a predicate or mask</a>
<ul>
<li class="chapter" data-level="6.2.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-33"><i class="fa fa-check"></i><b>6.2.1</b> Solution</a></li>
<li class="chapter" data-level="6.2.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-11"><i class="fa fa-check"></i><b>6.2.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.3" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#compute-meanminmax-etc-value-of-an-array"><i class="fa fa-check"></i><b>6.3</b> Compute Mean/Min/Max, etc value of an Array</a>
<ul>
<li class="chapter" data-level="6.3.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-34"><i class="fa fa-check"></i><b>6.3.1</b> Solution</a></li>
<li class="chapter" data-level="6.3.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-12"><i class="fa fa-check"></i><b>6.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.4" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#count-occurrences-of-elements-in-an-array"><i class="fa fa-check"></i><b>6.4</b> Count occurrences of elements in an Array</a>
<ul>
<li class="chapter" data-level="6.4.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-35"><i class="fa fa-check"></i><b>6.4.1</b> Solution</a></li>
<li class="chapter" data-level="6.4.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-13"><i class="fa fa-check"></i><b>6.4.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.5" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#apply-arithmetic-functions-to-arrays."><i class="fa fa-check"></i><b>6.5</b> Apply arithmetic functions to Arrays.</a>
<ul>
<li class="chapter" data-level="6.5.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-36"><i class="fa fa-check"></i><b>6.5.1</b> Solution</a></li>
<li class="chapter" data-level="6.5.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-14"><i class="fa fa-check"></i><b>6.5.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="6.6" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#call-arrow-compute-functions-directly-on-arrays"><i class="fa fa-check"></i><b>6.6</b> Call Arrow compute functions directly on Arrays</a>
<ul>
<li class="chapter" data-level="6.6.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-37"><i class="fa fa-check"></i><b>6.6.1</b> Solution</a></li>
<li class="chapter" data-level="6.6.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-15"><i class="fa fa-check"></i><b>6.6.2</b> Discussion</a></li>
<li class="chapter" data-level="6.6.3" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#see-also-2"><i class="fa fa-check"></i><b>6.6.3</b> See also</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="7" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html"><i class="fa fa-check"></i><b>7</b> Manipulating Data - Tables</a>
<ul>
<li class="chapter" data-level="7.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#introduction-4"><i class="fa fa-check"></i><b>7.1</b> Introduction</a></li>
<li class="chapter" data-level="7.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.2</b> Use dplyr verbs in Arrow</a>
<ul>
<li class="chapter" data-level="7.2.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-38"><i class="fa fa-check"></i><b>7.2.1</b> Solution</a></li>
<li class="chapter" data-level="7.2.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-16"><i class="fa fa-check"></i><b>7.2.2</b> Discussion</a></li>
<li class="chapter" data-level="7.2.3" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#see-also-3"><i class="fa fa-check"></i><b>7.2.3</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="7.3" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-r-functions-in-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.3</b> Use R functions in dplyr verbs in Arrow</a>
<ul>
<li class="chapter" data-level="7.3.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-39"><i class="fa fa-check"></i><b>7.3.1</b> Solution</a></li>
<li class="chapter" data-level="7.3.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-17"><i class="fa fa-check"></i><b>7.3.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="7.4" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-arrow-functions-in-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.4</b> Use Arrow functions in dplyr verbs in Arrow</a>
<ul>
<li class="chapter" data-level="7.4.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-40"><i class="fa fa-check"></i><b>7.4.1</b> Solution</a></li>
<li class="chapter" data-level="7.4.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-18"><i class="fa fa-check"></i><b>7.4.2</b> Discussion</a></li>
</ul></li>
<li class="chapter" data-level="7.5" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#compute-window-aggregates"><i class="fa fa-check"></i><b>7.5</b> Compute Window Aggregates</a>
<ul>
<li class="chapter" data-level="7.5.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-41"><i class="fa fa-check"></i><b>7.5.1</b> Solution</a></li>
<li class="chapter" data-level="7.5.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discusson"><i class="fa fa-check"></i><b>7.5.2</b> Discusson</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="8" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html"><i class="fa fa-check"></i><b>8</b> Using PyArrow from R</a>
<ul>
<li class="chapter" data-level="8.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#introduction-5"><i class="fa fa-check"></i><b>8.1</b> Introduction</a></li>
<li class="chapter" data-level="8.2" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#create-an-arrow-object-using-pyarrow-in-r"><i class="fa fa-check"></i><b>8.2</b> Create an Arrow object using PyArrow in R</a>
<ul>
<li class="chapter" data-level="8.2.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#solution-42"><i class="fa fa-check"></i><b>8.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="8.3" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#call-a-pyarrow-function-from-r"><i class="fa fa-check"></i><b>8.3</b> Call a PyArrow function from R</a>
<ul>
<li class="chapter" data-level="8.3.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#solution-43"><i class="fa fa-check"></i><b>8.3.1</b> Solution</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="9" data-path="flight.html"><a href="flight.html"><i class="fa fa-check"></i><b>9</b> Flight</a>
<ul>
<li class="chapter" data-level="9.1" data-path="flight.html"><a href="flight.html#introduction-6"><i class="fa fa-check"></i><b>9.1</b> Introduction</a></li>
<li class="chapter" data-level="9.2" data-path="flight.html"><a href="flight.html#connect-to-a-flight-server"><i class="fa fa-check"></i><b>9.2</b> Connect to a Flight server</a>
<ul>
<li class="chapter" data-level="9.2.1" data-path="flight.html"><a href="flight.html#solution-44"><i class="fa fa-check"></i><b>9.2.1</b> Solution</a></li>
<li class="chapter" data-level="9.2.2" data-path="flight.html"><a href="flight.html#see-also-4"><i class="fa fa-check"></i><b>9.2.2</b> See also</a></li>
</ul></li>
<li class="chapter" data-level="9.3" data-path="flight.html"><a href="flight.html#send-data-to-a-flight-server"><i class="fa fa-check"></i><b>9.3</b> Send data to a Flight server</a>
<ul>
<li class="chapter" data-level="9.3.1" data-path="flight.html"><a href="flight.html#solution-45"><i class="fa fa-check"></i><b>9.3.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="9.4" data-path="flight.html"><a href="flight.html#check-what-resources-exist-on-a-flight-server"><i class="fa fa-check"></i><b>9.4</b> Check what resources exist on a Flight server</a>
<ul>
<li class="chapter" data-level="9.4.1" data-path="flight.html"><a href="flight.html#solution-46"><i class="fa fa-check"></i><b>9.4.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="9.5" data-path="flight.html"><a href="flight.html#retrieve-data-from-a-flight-server"><i class="fa fa-check"></i><b>9.5</b> Retrieve data from a Flight server</a>
<ul>
<li class="chapter" data-level="9.5.1" data-path="flight.html"><a href="flight.html#solution-47"><i class="fa fa-check"></i><b>9.5.1</b> Solution</a></li>
</ul></li>
</ul></li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Apache Arrow R Cookbook</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<section class="normal" id="section-">
<div id="manipulating-data---tables" class="section level1 hasAnchor" number="7">
<h1><span class="header-section-number">7</span> Manipulating Data - Tables<a href="manipulating-data---tables.html#manipulating-data---tables" class="anchor-section" aria-label="Anchor link to header"></a></h1>
<div id="introduction-4" class="section level2 hasAnchor" number="7.1">
<h2><span class="header-section-number">7.1</span> Introduction<a href="manipulating-data---tables.html#introduction-4" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>One of the aims of the Arrow project is to reduce duplication between different
data frame implementations. The underlying implementation of a data frame is a
conceptually different thing to the code- or the application programming interface (API)-that you write to work with it.</p>
<p>You may have seen this before in packages like dbplyr which allow you to use
the dplyr API to interact with SQL databases.</p>
<p>The Arrow R package has been written so that the underlying Arrow Table-like
objects can be manipulated using the dplyr API, which allows you to use dplyr verbs.</p>
<p>For example, here’s a short pipeline of data manipulation which uses dplyr exclusively:</p>
<div class="sourceCode" id="cb103"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb103-1"><a href="manipulating-data---tables.html#cb103-1" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb103-2"><a href="manipulating-data---tables.html#cb103-2" tabindex="-1"></a>starwars <span class="sc">%&gt;%</span></span>
<span id="cb103-3"><a href="manipulating-data---tables.html#cb103-3" tabindex="-1"></a> <span class="fu">filter</span>(species <span class="sc">==</span> <span class="st">&quot;Human&quot;</span>) <span class="sc">%&gt;%</span></span>
<span id="cb103-4"><a href="manipulating-data---tables.html#cb103-4" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">height_ft =</span> height<span class="sc">/</span><span class="fl">30.48</span>) <span class="sc">%&gt;%</span></span>
<span id="cb103-5"><a href="manipulating-data---tables.html#cb103-5" tabindex="-1"></a> <span class="fu">select</span>(name, height_ft)</span></code></pre></div>
<pre><code>## # A tibble: 35 × 2
## name height_ft
## &lt;chr&gt; &lt;dbl&gt;
## 1 Luke Skywalker 5.64
## 2 Darth Vader 6.63
## 3 Leia Organa 4.92
## 4 Owen Lars 5.84
## 5 Beru Whitesun Lars 5.41
## 6 Biggs Darklighter 6.00
## 7 Obi-Wan Kenobi 5.97
## 8 Anakin Skywalker 6.17
## 9 Wilhuff Tarkin 5.91
## 10 Han Solo 5.91
## # ℹ 25 more rows</code></pre>
<p>And the same results as using Arrow with dplyr syntax:</p>
<div class="sourceCode" id="cb105"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb105-1"><a href="manipulating-data---tables.html#cb105-1" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb105-2"><a href="manipulating-data---tables.html#cb105-2" tabindex="-1"></a> <span class="fu">filter</span>(species <span class="sc">==</span> <span class="st">&quot;Human&quot;</span>) <span class="sc">%&gt;%</span></span>
<span id="cb105-3"><a href="manipulating-data---tables.html#cb105-3" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">height_ft =</span> height<span class="sc">/</span><span class="fl">30.48</span>) <span class="sc">%&gt;%</span></span>
<span id="cb105-4"><a href="manipulating-data---tables.html#cb105-4" tabindex="-1"></a> <span class="fu">select</span>(name, height_ft) <span class="sc">%&gt;%</span></span>
<span id="cb105-5"><a href="manipulating-data---tables.html#cb105-5" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 35 × 2
## name height_ft
## &lt;chr&gt; &lt;dbl&gt;
## 1 Luke Skywalker 5.64
## 2 Darth Vader 6.63
## 3 Leia Organa 4.92
## 4 Owen Lars 5.84
## 5 Beru Whitesun Lars 5.41
## 6 Biggs Darklighter 6.00
## 7 Obi-Wan Kenobi 5.97
## 8 Anakin Skywalker 6.17
## 9 Wilhuff Tarkin 5.91
## 10 Han Solo 5.91
## # ℹ 25 more rows</code></pre>
<p>You’ll notice we’ve used <code>collect()</code> in the Arrow pipeline above. That’s because
one of the ways in which Arrow is efficient is that it works out the instructions
for the calculations it needs to perform (<em>expressions</em>) and only runs them
using Arrow once you actually pull the data into your R session. This means
instead of doing lots of separate operations, it does them all at once in a
more optimised way. This is called <em>lazy evaluation</em>.</p>
<p>It also means that you are able to manipulate data that is larger than you can
fit into memory on the machine you’re running your code on, if you only pull
data into R when you have selected the desired subset, or when using functions
which can operate on chunks of data.</p>
<p>You can also have data which is split across multiple files. For example, you
might have files which are stored in multiple Parquet or Feather files,
partitioned across different directories. You can open partitioned or multi-file datasets
using <code>open_dataset()</code> as discussed in a previous chapter, and then manipulate
this data using Arrow before even reading any of the data into R.</p>
</div>
<div id="use-dplyr-verbs-in-arrow" class="section level2 hasAnchor" number="7.2">
<h2><span class="header-section-number">7.2</span> Use dplyr verbs in Arrow<a href="manipulating-data---tables.html#use-dplyr-verbs-in-arrow" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to use a dplyr verb in Arrow.</p>
<div id="solution-38" class="section level3 hasAnchor" number="7.2.1">
<h3><span class="header-section-number">7.2.1</span> Solution<a href="manipulating-data---tables.html#solution-38" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb107"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb107-1"><a href="manipulating-data---tables.html#cb107-1" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb107-2"><a href="manipulating-data---tables.html#cb107-2" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb107-3"><a href="manipulating-data---tables.html#cb107-3" tabindex="-1"></a> <span class="fu">filter</span>(species <span class="sc">==</span> <span class="st">&quot;Human&quot;</span>, homeworld <span class="sc">==</span> <span class="st">&quot;Tatooine&quot;</span>) <span class="sc">%&gt;%</span></span>
<span id="cb107-4"><a href="manipulating-data---tables.html#cb107-4" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 8 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## &lt;chr&gt; &lt;int&gt; &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt;
## 1 Luke Sky… 172 77 blond fair blue 19 male mascu…
## 2 Darth Va… 202 136 none white yellow 41.9 male mascu…
## 3 Owen Lars 178 120 brown, gr… light blue 52 male mascu…
## 4 Beru Whi… 165 75 brown light blue 47 fema… femin…
## 5 Biggs Da… 183 84 black light brown 24 male mascu…
## 6 Anakin S… 188 84 blond fair blue 41.9 male mascu…
## 7 Shmi Sky… 163 NA black fair brown 72 fema… femin…
## 8 Cliegg L… 183 NA brown fair blue 82 male mascu…
## # ℹ 5 more variables: homeworld &lt;chr&gt;, species &lt;chr&gt;, films &lt;list&lt;character&gt;&gt;,
## # vehicles &lt;list&lt;character&gt;&gt;, starships &lt;list&lt;character&gt;&gt;</code></pre>
</div>
<div id="discussion-16" class="section level3 hasAnchor" number="7.2.2">
<h3><span class="header-section-number">7.2.2</span> Discussion<a href="manipulating-data---tables.html#discussion-16" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>You can use most of the dplyr verbs directly from Arrow.</p>
</div>
<div id="see-also-3" class="section level3 hasAnchor" number="7.2.3">
<h3><span class="header-section-number">7.2.3</span> See also<a href="manipulating-data---tables.html#see-also-3" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>You can find examples of the various dplyr verbs in “Introduction to dplyr” -
run <code>vignette("dplyr", package = "dplyr")</code> or view on
the <a href="https://dplyr.tidyverse.org/articles/dplyr.html">pkgdown site</a>.</p>
<p>You can see more information about using <code>arrow_table()</code> to create Arrow Tables
and <code>collect()</code> to view them as R data frames in <a href="creating-arrow-objects.html#creating-arrow-objects">Creating Arrow Objects</a>.</p>
</div>
</div>
<div id="use-r-functions-in-dplyr-verbs-in-arrow" class="section level2 hasAnchor" number="7.3">
<h2><span class="header-section-number">7.3</span> Use R functions in dplyr verbs in Arrow<a href="manipulating-data---tables.html#use-r-functions-in-dplyr-verbs-in-arrow" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to use an R function inside a dplyr verb in Arrow.</p>
<div id="solution-39" class="section level3 hasAnchor" number="7.3.1">
<h3><span class="header-section-number">7.3.1</span> Solution<a href="manipulating-data---tables.html#solution-39" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb109"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb109-1"><a href="manipulating-data---tables.html#cb109-1" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb109-2"><a href="manipulating-data---tables.html#cb109-2" tabindex="-1"></a> <span class="fu">filter</span>(<span class="fu">str_detect</span>(name, <span class="st">&quot;Darth&quot;</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb109-3"><a href="manipulating-data---tables.html#cb109-3" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 2 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## &lt;chr&gt; &lt;int&gt; &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt;
## 1 Darth Va… 202 136 none white yellow 41.9 male mascu…
## 2 Darth Ma… 175 80 none red yellow 54 male mascu…
## # ℹ 5 more variables: homeworld &lt;chr&gt;, species &lt;chr&gt;, films &lt;list&lt;character&gt;&gt;,
## # vehicles &lt;list&lt;character&gt;&gt;, starships &lt;list&lt;character&gt;&gt;</code></pre>
</div>
<div id="discussion-17" class="section level3 hasAnchor" number="7.3.2">
<h3><span class="header-section-number">7.3.2</span> Discussion<a href="manipulating-data---tables.html#discussion-17" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>The Arrow R package allows you to use dplyr verbs containing expressions which
include base R and many tidyverse functions, but call Arrow functions under the hood.
If you find any base R or tidyverse functions which you would like to see a
mapping of in Arrow, please
<a href="https://issues.apache.org/jira/projects/ARROW/issues">open an issue on the project JIRA</a>.</p>
<p>The following packages (amongst some from others) have had many function
bindings/mappings written in arrow:</p>
<ul>
<li><a href="https://lubridate.tidyverse.org/">lubridate</a></li>
<li><a href="https://stringr.tidyverse.org/">stringr</a></li>
<li><a href="https://dplyr.tidyverse.org/">dplyr</a></li>
</ul>
<p>If you try to call a function which does not have arrow mapping, the data will
be pulled back into R, and you will see a warning message.</p>
<div class="sourceCode" id="cb111"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb111-1"><a href="manipulating-data---tables.html#cb111-1" tabindex="-1"></a><span class="fu">library</span>(stringr)</span>
<span id="cb111-2"><a href="manipulating-data---tables.html#cb111-2" tabindex="-1"></a></span>
<span id="cb111-3"><a href="manipulating-data---tables.html#cb111-3" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb111-4"><a href="manipulating-data---tables.html#cb111-4" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">name_split =</span> <span class="fu">str_split_fixed</span>(name, <span class="st">&quot; &quot;</span>, <span class="dv">2</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb111-5"><a href="manipulating-data---tables.html#cb111-5" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## Warning: Expression str_split_fixed(name, &quot; &quot;, 2) not supported in Arrow;
## pulling data into R</code></pre>
<pre><code>## # A tibble: 87 × 15
## name height mass hair_color skin_color eye_color birth_year sex gender
## &lt;chr&gt; &lt;int&gt; &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt;
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 &lt;NA&gt; gold yellow 112 none mascu…
## 3 R2-D2 96 32 &lt;NA&gt; white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 &lt;NA&gt; white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## # ℹ 77 more rows
## # ℹ 6 more variables: homeworld &lt;chr&gt;, species &lt;chr&gt;, films &lt;list&lt;character&gt;&gt;,
## # vehicles &lt;list&lt;character&gt;&gt;, starships &lt;list&lt;character&gt;&gt;,
## # name_split &lt;chr[,2]&gt;</code></pre>
</div>
</div>
<div id="use-arrow-functions-in-dplyr-verbs-in-arrow" class="section level2 hasAnchor" number="7.4">
<h2><span class="header-section-number">7.4</span> Use Arrow functions in dplyr verbs in Arrow<a href="manipulating-data---tables.html#use-arrow-functions-in-dplyr-verbs-in-arrow" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to use a function which is implemented in Arrow’s C++ library but either:</p>
<ul>
<li>it doesn’t have a mapping to a base R or tidyverse equivalent, or</li>
<li>it has a mapping but nevertheless you want to call the C++ function directly</li>
</ul>
<div id="solution-40" class="section level3 hasAnchor" number="7.4.1">
<h3><span class="header-section-number">7.4.1</span> Solution<a href="manipulating-data---tables.html#solution-40" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb114"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb114-1"><a href="manipulating-data---tables.html#cb114-1" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb114-2"><a href="manipulating-data---tables.html#cb114-2" tabindex="-1"></a> <span class="fu">select</span>(name) <span class="sc">%&gt;%</span></span>
<span id="cb114-3"><a href="manipulating-data---tables.html#cb114-3" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">padded_name =</span> <span class="fu">arrow_ascii_lpad</span>(name, <span class="at">options =</span> <span class="fu">list</span>(<span class="at">width =</span> <span class="dv">10</span>, <span class="at">padding =</span> <span class="st">&quot;*&quot;</span>))) <span class="sc">%&gt;%</span></span>
<span id="cb114-4"><a href="manipulating-data---tables.html#cb114-4" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 87 × 2
## name padded_name
## &lt;chr&gt; &lt;chr&gt;
## 1 Luke Skywalker Luke Skywalker
## 2 C-3PO *****C-3PO
## 3 R2-D2 *****R2-D2
## 4 Darth Vader Darth Vader
## 5 Leia Organa Leia Organa
## 6 Owen Lars *Owen Lars
## 7 Beru Whitesun Lars Beru Whitesun Lars
## 8 R5-D4 *****R5-D4
## 9 Biggs Darklighter Biggs Darklighter
## 10 Obi-Wan Kenobi Obi-Wan Kenobi
## # ℹ 77 more rows</code></pre>
</div>
<div id="discussion-18" class="section level3 hasAnchor" number="7.4.2">
<h3><span class="header-section-number">7.4.2</span> Discussion<a href="manipulating-data---tables.html#discussion-18" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>The vast majority of Arrow C++ compute functions have been mapped to their
base R or tidyverse equivalents, and we strongly recommend that you use
these mappings where possible, as the original functions are well documented
and the mapped versions have been tested to ensure the results returned are as
expected.</p>
<p>However, there may be circumstances in which you might want to use a compute
function from the Arrow C++ library which does not have a base R or tidyverse
equivalent.</p>
<p>You can find documentation of Arrow C++ compute functions in
<a href="https://arrow.apache.org/docs/cpp/compute.html#available-functions">the C++ documention</a>.
This documentation lists all available compute functions, any associated options classes
they need, and the valid data types that they can be used with.</p>
<p>You can list all available Arrow compute functions from R by calling
<code>list_compute_functions()</code>.</p>
<div class="sourceCode" id="cb116"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb116-1"><a href="manipulating-data---tables.html#cb116-1" tabindex="-1"></a><span class="fu">list_compute_functions</span>()</span></code></pre></div>
<pre><code>## [1] &quot;abs&quot; &quot;abs_checked&quot;
## [3] &quot;acos&quot; &quot;acos_checked&quot;
## [5] &quot;add&quot; &quot;add_checked&quot;
## [7] &quot;all&quot; &quot;and&quot;
## [9] &quot;and_kleene&quot; &quot;and_not&quot;
## [11] &quot;and_not_kleene&quot; &quot;any&quot;
## [13] &quot;approximate_median&quot; &quot;array_filter&quot;
## [15] &quot;array_sort_indices&quot; &quot;array_take&quot;
## [17] &quot;ascii_capitalize&quot; &quot;ascii_center&quot;
## [19] &quot;ascii_is_alnum&quot; &quot;ascii_is_alpha&quot;
## [21] &quot;ascii_is_decimal&quot; &quot;ascii_is_lower&quot;
## [23] &quot;ascii_is_printable&quot; &quot;ascii_is_space&quot;
## [25] &quot;ascii_is_title&quot; &quot;ascii_is_upper&quot;
## [27] &quot;ascii_lower&quot; &quot;ascii_lpad&quot;
## [29] &quot;ascii_ltrim&quot; &quot;ascii_ltrim_whitespace&quot;
## [31] &quot;ascii_reverse&quot; &quot;ascii_rpad&quot;
## [33] &quot;ascii_rtrim&quot; &quot;ascii_rtrim_whitespace&quot;
## [35] &quot;ascii_split_whitespace&quot; &quot;ascii_swapcase&quot;
## [37] &quot;ascii_title&quot; &quot;ascii_trim&quot;
## [39] &quot;ascii_trim_whitespace&quot; &quot;ascii_upper&quot;
## [41] &quot;asin&quot; &quot;asin_checked&quot;
## [43] &quot;assume_timezone&quot; &quot;atan&quot;
## [45] &quot;atan2&quot; &quot;binary_join&quot;
## [47] &quot;binary_join_element_wise&quot; &quot;binary_length&quot;
## [49] &quot;binary_repeat&quot; &quot;binary_replace_slice&quot;
## [51] &quot;binary_reverse&quot; &quot;binary_slice&quot;
## [53] &quot;bit_wise_and&quot; &quot;bit_wise_not&quot;
## [55] &quot;bit_wise_or&quot; &quot;bit_wise_xor&quot;
## [57] &quot;case_when&quot; &quot;cast&quot;
## [59] &quot;ceil&quot; &quot;ceil_temporal&quot;
## [61] &quot;choose&quot; &quot;coalesce&quot;
## [63] &quot;cos&quot; &quot;cos_checked&quot;
## [65] &quot;count&quot; &quot;count_all&quot;
## [67] &quot;count_distinct&quot; &quot;count_substring&quot;
## [69] &quot;count_substring_regex&quot; &quot;cumulative_max&quot;
## [71] &quot;cumulative_mean&quot; &quot;cumulative_min&quot;
## [73] &quot;cumulative_prod&quot; &quot;cumulative_prod_checked&quot;
## [75] &quot;cumulative_sum&quot; &quot;cumulative_sum_checked&quot;
## [77] &quot;day&quot; &quot;day_of_week&quot;
## [79] &quot;day_of_year&quot; &quot;day_time_interval_between&quot;
## [81] &quot;days_between&quot; &quot;dictionary_decode&quot;
## [83] &quot;dictionary_encode&quot; &quot;divide&quot;
## [85] &quot;divide_checked&quot; &quot;drop_null&quot;
## [87] &quot;ends_with&quot; &quot;equal&quot;
## [89] &quot;exp&quot; &quot;extract_regex&quot;
## [91] &quot;fill_null_backward&quot; &quot;fill_null_forward&quot;
## [93] &quot;filter&quot; &quot;find_substring&quot;
## [95] &quot;find_substring_regex&quot; &quot;first&quot;
## [97] &quot;first_last&quot; &quot;floor&quot;
## [99] &quot;floor_temporal&quot; &quot;greater&quot;
## [101] &quot;greater_equal&quot; &quot;hour&quot;
## [103] &quot;hours_between&quot; &quot;if_else&quot;
## [105] &quot;index&quot; &quot;index_in&quot;
## [107] &quot;index_in_meta_binary&quot; &quot;indices_nonzero&quot;
## [109] &quot;invert&quot; &quot;is_dst&quot;
## [111] &quot;is_finite&quot; &quot;is_in&quot;
## [113] &quot;is_in_meta_binary&quot; &quot;is_inf&quot;
## [115] &quot;is_leap_year&quot; &quot;is_nan&quot;
## [117] &quot;is_null&quot; &quot;is_valid&quot;
## [119] &quot;iso_calendar&quot; &quot;iso_week&quot;
## [121] &quot;iso_year&quot; &quot;last&quot;
## [123] &quot;less&quot; &quot;less_equal&quot;
## [125] &quot;list_element&quot; &quot;list_flatten&quot;
## [127] &quot;list_parent_indices&quot; &quot;list_slice&quot;
## [129] &quot;list_value_length&quot; &quot;ln&quot;
## [131] &quot;ln_checked&quot; &quot;local_timestamp&quot;
## [133] &quot;log10&quot; &quot;log10_checked&quot;
## [135] &quot;log1p&quot; &quot;log1p_checked&quot;
## [137] &quot;log2&quot; &quot;log2_checked&quot;
## [139] &quot;logb&quot; &quot;logb_checked&quot;
## [141] &quot;make_struct&quot; &quot;map_lookup&quot;
## [143] &quot;match_like&quot; &quot;match_substring&quot;
## [145] &quot;match_substring_regex&quot; &quot;max&quot;
## [147] &quot;max_element_wise&quot; &quot;mean&quot;
## [149] &quot;microsecond&quot; &quot;microseconds_between&quot;
## [151] &quot;millisecond&quot; &quot;milliseconds_between&quot;
## [153] &quot;min&quot; &quot;min_element_wise&quot;
## [155] &quot;min_max&quot; &quot;minute&quot;
## [157] &quot;minutes_between&quot; &quot;mode&quot;
## [159] &quot;month&quot; &quot;month_day_nano_interval_between&quot;
## [161] &quot;month_interval_between&quot; &quot;multiply&quot;
## [163] &quot;multiply_checked&quot; &quot;nanosecond&quot;
## [165] &quot;nanoseconds_between&quot; &quot;negate&quot;
## [167] &quot;negate_checked&quot; &quot;not_equal&quot;
## [169] &quot;or&quot; &quot;or_kleene&quot;
## [171] &quot;pairwise_diff&quot; &quot;pairwise_diff_checked&quot;
## [173] &quot;partition_nth_indices&quot; &quot;power&quot;
## [175] &quot;power_checked&quot; &quot;product&quot;
## [177] &quot;quantile&quot; &quot;quarter&quot;
## [179] &quot;quarters_between&quot; &quot;random&quot;
## [181] &quot;rank&quot; &quot;replace_substring&quot;
## [183] &quot;replace_substring_regex&quot; &quot;replace_with_mask&quot;
## [185] &quot;round&quot; &quot;round_binary&quot;
## [187] &quot;round_temporal&quot; &quot;round_to_multiple&quot;
## [189] &quot;run_end_decode&quot; &quot;run_end_encode&quot;
## [191] &quot;second&quot; &quot;seconds_between&quot;
## [193] &quot;select_k_unstable&quot; &quot;shift_left&quot;
## [195] &quot;shift_left_checked&quot; &quot;shift_right&quot;
## [197] &quot;shift_right_checked&quot; &quot;sign&quot;
## [199] &quot;sin&quot; &quot;sin_checked&quot;
## [201] &quot;sort_indices&quot; &quot;split_pattern&quot;
## [203] &quot;split_pattern_regex&quot; &quot;sqrt&quot;
## [205] &quot;sqrt_checked&quot; &quot;starts_with&quot;
## [207] &quot;stddev&quot; &quot;strftime&quot;
## [209] &quot;string_is_ascii&quot; &quot;strptime&quot;
## [211] &quot;struct_field&quot; &quot;subsecond&quot;
## [213] &quot;subtract&quot; &quot;subtract_checked&quot;
## [215] &quot;sum&quot; &quot;take&quot;
## [217] &quot;tan&quot; &quot;tan_checked&quot;
## [219] &quot;tdigest&quot; &quot;true_unless_null&quot;
## [221] &quot;trunc&quot; &quot;unique&quot;
## [223] &quot;us_week&quot; &quot;us_year&quot;
## [225] &quot;utf8_capitalize&quot; &quot;utf8_center&quot;
## [227] &quot;utf8_is_alnum&quot; &quot;utf8_is_alpha&quot;
## [229] &quot;utf8_is_decimal&quot; &quot;utf8_is_digit&quot;
## [231] &quot;utf8_is_lower&quot; &quot;utf8_is_numeric&quot;
## [233] &quot;utf8_is_printable&quot; &quot;utf8_is_space&quot;
## [235] &quot;utf8_is_title&quot; &quot;utf8_is_upper&quot;
## [237] &quot;utf8_length&quot; &quot;utf8_lower&quot;
## [239] &quot;utf8_lpad&quot; &quot;utf8_ltrim&quot;
## [241] &quot;utf8_ltrim_whitespace&quot; &quot;utf8_normalize&quot;
## [243] &quot;utf8_replace_slice&quot; &quot;utf8_reverse&quot;
## [245] &quot;utf8_rpad&quot; &quot;utf8_rtrim&quot;
## [247] &quot;utf8_rtrim_whitespace&quot; &quot;utf8_slice_codeunits&quot;
## [249] &quot;utf8_split_whitespace&quot; &quot;utf8_swapcase&quot;
## [251] &quot;utf8_title&quot; &quot;utf8_trim&quot;
## [253] &quot;utf8_trim_whitespace&quot; &quot;utf8_upper&quot;
## [255] &quot;value_counts&quot; &quot;variance&quot;
## [257] &quot;week&quot; &quot;weeks_between&quot;
## [259] &quot;xor&quot; &quot;year&quot;
## [261] &quot;year_month_day&quot; &quot;years_between&quot;</code></pre>
<p>The majority of functions here have been mapped to their base R or tidyverse
equivalent and can be called within a dplyr query as usual. For functions which
don’t have a base R or tidyverse equivalent, or you want to supply custom
options, you can call them by prefixing their name with “arrow_”.</p>
<p>For example, base R’s <code>is.na()</code> function is the equivalent of the Arrow C++
compute function <code>is_null()</code> with the option <code>nan_is_null</code> set to <code>TRUE</code>.<br />
A mapping between these functions (with <code>nan_is_null</code> set to <code>TRUE</code>) has been
created in arrow.</p>
<div class="sourceCode" id="cb118"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb118-1"><a href="manipulating-data---tables.html#cb118-1" tabindex="-1"></a>demo_df <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">x =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="cn">NA</span>, <span class="cn">NaN</span>))</span>
<span id="cb118-2"><a href="manipulating-data---tables.html#cb118-2" tabindex="-1"></a></span>
<span id="cb118-3"><a href="manipulating-data---tables.html#cb118-3" tabindex="-1"></a><span class="fu">arrow_table</span>(demo_df) <span class="sc">%&gt;%</span></span>
<span id="cb118-4"><a href="manipulating-data---tables.html#cb118-4" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">y =</span> <span class="fu">is.na</span>(x)) <span class="sc">%&gt;%</span> </span>
<span id="cb118-5"><a href="manipulating-data---tables.html#cb118-5" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 5 × 2
## x y
## &lt;dbl&gt; &lt;lgl&gt;
## 1 1 FALSE
## 2 2 FALSE
## 3 3 FALSE
## 4 NA TRUE
## 5 NaN TRUE</code></pre>
<p>If you want to call Arrow’s <code>is_null()</code> function but with <code>nan_is_null</code> set to
<code>FALSE</code> (so it returns <code>TRUE</code> when a value being examined is <code>NA</code> but <code>FALSE</code>
when the value being examined is <code>NaN</code>), you must call <code>is_null()</code> directly and
specify the option <code>nan_is_null = FALSE</code>.</p>
<div class="sourceCode" id="cb120"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb120-1"><a href="manipulating-data---tables.html#cb120-1" tabindex="-1"></a><span class="fu">arrow_table</span>(demo_df) <span class="sc">%&gt;%</span></span>
<span id="cb120-2"><a href="manipulating-data---tables.html#cb120-2" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">y =</span> <span class="fu">arrow_is_null</span>(x, <span class="at">options =</span> <span class="fu">list</span>(<span class="at">nan_is_null =</span> <span class="cn">FALSE</span>))) <span class="sc">%&gt;%</span> </span>
<span id="cb120-3"><a href="manipulating-data---tables.html#cb120-3" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 5 × 2
## x y
## &lt;dbl&gt; &lt;lgl&gt;
## 1 1 FALSE
## 2 2 FALSE
## 3 3 FALSE
## 4 NA TRUE
## 5 NaN FALSE</code></pre>
<div id="compute-functions-with-options" class="section level4 hasAnchor" number="7.4.2.1">
<h4><span class="header-section-number">7.4.2.1</span> Compute functions with options<a href="manipulating-data---tables.html#compute-functions-with-options" class="anchor-section" aria-label="Anchor link to header"></a></h4>
<p>Although not all Arrow C++ compute functions require options to be specified,
most do. For these functions to work in R, they must be linked up
with the appropriate libarrow options C++ class via the R
package’s C++ code. At the time of writing, all compute functions available in
the development version of the Arrow R package had been associated with their options
classes. However, as the Arrow C++ library’s functionality extends, compute
functions may be added which do not yet have an R binding. If you find a C++
compute function which you wish to use from the R package, please <a href="https://github.com/apache/arrow/issues">open an issue
on the Github project</a>.</p>
</div>
</div>
</div>
<div id="compute-window-aggregates" class="section level2 hasAnchor" number="7.5">
<h2><span class="header-section-number">7.5</span> Compute Window Aggregates<a href="manipulating-data---tables.html#compute-window-aggregates" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>You want to apply an aggregation (e.g. <code>mean()</code>) on a grouped table or within a rowwise operation like <code>filter()</code>:</p>
<div id="solution-41" class="section level3 hasAnchor" number="7.5.1">
<h3><span class="header-section-number">7.5.1</span> Solution<a href="manipulating-data---tables.html#solution-41" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div class="sourceCode" id="cb122"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb122-1"><a href="manipulating-data---tables.html#cb122-1" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb122-2"><a href="manipulating-data---tables.html#cb122-2" tabindex="-1"></a> <span class="fu">select</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb122-3"><a href="manipulating-data---tables.html#cb122-3" tabindex="-1"></a> <span class="fu">filter</span>(<span class="sc">!</span><span class="fu">is.na</span>(hair_color)) <span class="sc">%&gt;%</span></span>
<span id="cb122-4"><a href="manipulating-data---tables.html#cb122-4" tabindex="-1"></a> <span class="fu">left_join</span>(</span>
<span id="cb122-5"><a href="manipulating-data---tables.html#cb122-5" tabindex="-1"></a> <span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb122-6"><a href="manipulating-data---tables.html#cb122-6" tabindex="-1"></a> <span class="fu">group_by</span>(hair_color) <span class="sc">%&gt;%</span></span>
<span id="cb122-7"><a href="manipulating-data---tables.html#cb122-7" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">mean_height =</span> <span class="fu">mean</span>(height, <span class="at">na.rm =</span> <span class="cn">TRUE</span>))</span>
<span id="cb122-8"><a href="manipulating-data---tables.html#cb122-8" tabindex="-1"></a> ) <span class="sc">%&gt;%</span></span>
<span id="cb122-9"><a href="manipulating-data---tables.html#cb122-9" tabindex="-1"></a> <span class="fu">filter</span>(height <span class="sc">&lt;</span> mean_height) <span class="sc">%&gt;%</span></span>
<span id="cb122-10"><a href="manipulating-data---tables.html#cb122-10" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">!</span>mean_height) <span class="sc">%&gt;%</span></span>
<span id="cb122-11"><a href="manipulating-data---tables.html#cb122-11" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 28 × 4
## name height mass hair_color
## &lt;chr&gt; &lt;int&gt; &lt;dbl&gt; &lt;chr&gt;
## 1 Luke Skywalker 172 77 blond
## 2 Leia Organa 150 49 brown
## 3 Beru Whitesun Lars 165 75 brown
## 4 Wedge Antilles 170 77 brown
## 5 Yoda 66 17 white
## 6 Lobot 175 79 none
## 7 Ackbar 180 83 none
## 8 Wicket Systri Warrick 88 20 brown
## 9 Nien Nunb 160 68 none
## 10 Finis Valorum 170 NA blond
## # ℹ 18 more rows</code></pre>
<p>Or using <code>to_duckdb()</code></p>
<div class="sourceCode" id="cb124"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb124-1"><a href="manipulating-data---tables.html#cb124-1" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb124-2"><a href="manipulating-data---tables.html#cb124-2" tabindex="-1"></a> <span class="fu">select</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb124-3"><a href="manipulating-data---tables.html#cb124-3" tabindex="-1"></a> <span class="fu">filter</span>(<span class="sc">!</span><span class="fu">is.na</span>(hair_color)) <span class="sc">%&gt;%</span></span>
<span id="cb124-4"><a href="manipulating-data---tables.html#cb124-4" tabindex="-1"></a> <span class="fu">to_duckdb</span>() <span class="sc">%&gt;%</span></span>
<span id="cb124-5"><a href="manipulating-data---tables.html#cb124-5" tabindex="-1"></a> <span class="fu">group_by</span>(hair_color) <span class="sc">%&gt;%</span></span>
<span id="cb124-6"><a href="manipulating-data---tables.html#cb124-6" tabindex="-1"></a> <span class="fu">filter</span>(height <span class="sc">&lt;</span> <span class="fu">mean</span>(height, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb124-7"><a href="manipulating-data---tables.html#cb124-7" tabindex="-1"></a> <span class="fu">to_arrow</span>() <span class="sc">%&gt;%</span></span>
<span id="cb124-8"><a href="manipulating-data---tables.html#cb124-8" tabindex="-1"></a> <span class="fu">collect</span>()</span></code></pre></div>
<pre><code>## # A tibble: 28 × 4
## name height mass hair_color
## &lt;chr&gt; &lt;int&gt; &lt;dbl&gt; &lt;chr&gt;
## 1 Yoda 66 17 white
## 2 Luke Skywalker 172 77 blond
## 3 Finis Valorum 170 NA blond
## 4 R4-P17 96 NA none
## 5 Lobot 175 79 none
## 6 Ackbar 180 83 none
## 7 Nien Nunb 160 68 none
## 8 Darth Maul 175 80 none
## 9 Bib Fortuna 180 NA none
## 10 Ayla Secura 178 55 none
## # ℹ 18 more rows</code></pre>
</div>
<div id="discusson" class="section level3 hasAnchor" number="7.5.2">
<h3><span class="header-section-number">7.5.2</span> Discusson<a href="manipulating-data---tables.html#discusson" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<p>Arrow does not support window functions, and pulls the data into R. For large tables, this sacrifices performance.</p>
<div class="sourceCode" id="cb126"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb126-1"><a href="manipulating-data---tables.html#cb126-1" tabindex="-1"></a><span class="fu">arrow_table</span>(starwars) <span class="sc">%&gt;%</span></span>
<span id="cb126-2"><a href="manipulating-data---tables.html#cb126-2" tabindex="-1"></a> <span class="fu">select</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb126-3"><a href="manipulating-data---tables.html#cb126-3" tabindex="-1"></a> <span class="fu">filter</span>(<span class="sc">!</span><span class="fu">is.na</span>(hair_color)) <span class="sc">%&gt;%</span></span>
<span id="cb126-4"><a href="manipulating-data---tables.html#cb126-4" tabindex="-1"></a> <span class="fu">group_by</span>(hair_color) <span class="sc">%&gt;%</span></span>
<span id="cb126-5"><a href="manipulating-data---tables.html#cb126-5" tabindex="-1"></a> <span class="fu">filter</span>(height <span class="sc">&lt;</span> <span class="fu">mean</span>(height, <span class="at">na.rm =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
<pre><code>## Warning: Expression height &lt; mean(height, na.rm = TRUE) not supported in Arrow;
## pulling data into R</code></pre>
<pre><code>## # A tibble: 28 × 4
## # Groups: hair_color [5]
## name height mass hair_color
## &lt;chr&gt; &lt;int&gt; &lt;dbl&gt; &lt;chr&gt;
## 1 Luke Skywalker 172 77 blond
## 2 Leia Organa 150 49 brown
## 3 Beru Whitesun Lars 165 75 brown
## 4 Wedge Antilles 170 77 brown
## 5 Yoda 66 17 white
## 6 Lobot 175 79 none
## 7 Ackbar 180 83 none
## 8 Wicket Systri Warrick 88 20 brown
## 9 Nien Nunb 160 68 none
## 10 Finis Valorum 170 NA blond
## # ℹ 18 more rows</code></pre>
<p>You can perform these window aggregate operations on Arrow tables by:</p>
<ul>
<li>Computing the aggregation separately, and joining the result</li>
<li>Passing the data to DuckDB, and use the DuckDB query engine to perform the operations</li>
</ul>
<p>Arrow supports zero-copy integration with DuckDB, and DuckDB can query Arrow datasets directly and stream query results back to Arrow. This integreation uses zero-copy streaming of data between DuckDB and Arrow and vice versa so that you can compose a query using both together, all the while not paying any cost to (re)serialize the data when you pass it back and forth. This is especially useful in cases where something is supported in one of Arrow or DuckDB query engines but not the other. You can find more information about this integration on the <a href="https://arrow.apache.org/blog/2021/12/03/arrow-duckdb/">Arrow blog post</a>.</p>
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
</div>
</div>
</div>
</section>
</div>
</div>
</div>
<a href="manipulating-data---arrays.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="using-pyarrow-from-r.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
</div>
</div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"whatsapp": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": "https://github.com/apache/arrow-cookbook/edit/main/r/content/tables.Rmd",
"text": "Edit"
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": null,
"search": {
"engine": "fuse",
"options": null
},
"toc": {
"collapse": "subsection"
}
});
});
</script>
</body>
</html>