| <!DOCTYPE html> |
| <html lang="" xml:lang=""> |
| <head> |
| |
| <meta charset="utf-8" /> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge" /> |
| <title>2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook</title> |
| <meta name="description" content="2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook" /> |
| <meta name="generator" content="bookdown 0.38 and GitBook 2.6.7" /> |
| |
| <meta property="og:title" content="2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook" /> |
| <meta property="og:type" content="book" /> |
| |
| |
| |
| |
| <meta name="twitter:card" content="summary" /> |
| <meta name="twitter:title" content="2 Reading and Writing Data - Single Files | Apache Arrow R Cookbook" /> |
| |
| |
| |
| |
| |
| |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="apple-mobile-web-app-capable" content="yes" /> |
| <meta name="apple-mobile-web-app-status-bar-style" content="black" /> |
| |
| |
| <link rel="prev" href="index.html"/> |
| <link rel="next" href="reading-and-writing-data---multiple-files.html"/> |
| <script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script> |
| <script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script> |
| <link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" /> |
| <link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" /> |
| <link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" /> |
| <link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" /> |
| <link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" /> |
| <link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" /> |
| <link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" /> |
| |
| |
| |
| |
| |
| |
| |
| |
| <link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" /> |
| <link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" /> |
| <script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script> |
| |
| |
| <style type="text/css"> |
| pre > code.sourceCode { white-space: pre; position: relative; } |
| pre > code.sourceCode > span { line-height: 1.25; } |
| pre > code.sourceCode > span:empty { height: 1.2em; } |
| .sourceCode { overflow: visible; } |
| code.sourceCode > span { color: inherit; text-decoration: inherit; } |
| pre.sourceCode { margin: 0; } |
| @media screen { |
| div.sourceCode { overflow: auto; } |
| } |
| @media print { |
| pre > code.sourceCode { white-space: pre-wrap; } |
| pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } |
| } |
| pre.numberSource code |
| { counter-reset: source-line 0; } |
| pre.numberSource code > span |
| { position: relative; left: -4em; counter-increment: source-line; } |
| pre.numberSource code > span > a:first-child::before |
| { content: counter(source-line); |
| position: relative; left: -1em; text-align: right; vertical-align: baseline; |
| border: none; display: inline-block; |
| -webkit-touch-callout: none; -webkit-user-select: none; |
| -khtml-user-select: none; -moz-user-select: none; |
| -ms-user-select: none; user-select: none; |
| padding: 0 4px; width: 4em; |
| color: #aaaaaa; |
| } |
| pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } |
| div.sourceCode |
| { } |
| @media screen { |
| pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } |
| } |
| code span.al { color: #ff0000; font-weight: bold; } /* Alert */ |
| code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ |
| code span.at { color: #7d9029; } /* Attribute */ |
| code span.bn { color: #40a070; } /* BaseN */ |
| code span.bu { color: #008000; } /* BuiltIn */ |
| code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ |
| code span.ch { color: #4070a0; } /* Char */ |
| code span.cn { color: #880000; } /* Constant */ |
| code span.co { color: #60a0b0; font-style: italic; } /* Comment */ |
| code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ |
| code span.do { color: #ba2121; font-style: italic; } /* Documentation */ |
| code span.dt { color: #902000; } /* DataType */ |
| code span.dv { color: #40a070; } /* DecVal */ |
| code span.er { color: #ff0000; font-weight: bold; } /* Error */ |
| code span.ex { } /* Extension */ |
| code span.fl { color: #40a070; } /* Float */ |
| code span.fu { color: #06287e; } /* Function */ |
| code span.im { color: #008000; font-weight: bold; } /* Import */ |
| code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ |
| code span.kw { color: #007020; font-weight: bold; } /* Keyword */ |
| code span.op { color: #666666; } /* Operator */ |
| code span.ot { color: #007020; } /* Other */ |
| code span.pp { color: #bc7a00; } /* Preprocessor */ |
| code span.sc { color: #4070a0; } /* SpecialChar */ |
| code span.ss { color: #bb6688; } /* SpecialString */ |
| code span.st { color: #4070a0; } /* String */ |
| code span.va { color: #19177c; } /* Variable */ |
| code span.vs { color: #4070a0; } /* VerbatimString */ |
| code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ |
| </style> |
| |
| <style type="text/css"> |
| |
| div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} |
| </style> |
| |
| </head> |
| |
| <body> |
| |
| |
| |
| <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath="."> |
| |
| <div class="book-summary"> |
| <nav role="navigation"> |
| |
| <ul class="summary"> |
| <li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Preface</a> |
| <ul> |
| <li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#what-is-arrow"><i class="fa fa-check"></i><b>1.1</b> What is Arrow?</a></li> |
| <li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#alternative-resources"><i class="fa fa-check"></i><b>1.2</b> Alternative resources</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html"><i class="fa fa-check"></i><b>2</b> Reading and Writing Data - Single Files</a> |
| <ul> |
| <li class="chapter" data-level="2.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#introduction"><i class="fa fa-check"></i><b>2.1</b> Introduction</a></li> |
| <li class="chapter" data-level="2.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#convert-data-from-a-data-frame-to-an-arrow-table"><i class="fa fa-check"></i><b>2.2</b> Convert data from a data frame to an Arrow Table</a> |
| <ul> |
| <li class="chapter" data-level="2.2.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution"><i class="fa fa-check"></i><b>2.2.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.3" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#convert-data-from-an-arrow-table-to-a-data-frame"><i class="fa fa-check"></i><b>2.3</b> Convert data from an Arrow Table to a data frame</a> |
| <ul> |
| <li class="chapter" data-level="2.3.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-1"><i class="fa fa-check"></i><b>2.3.1</b> Solution</a></li> |
| <li class="chapter" data-level="2.3.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion"><i class="fa fa-check"></i><b>2.3.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.4" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-parquet-file"><i class="fa fa-check"></i><b>2.4</b> Write a Parquet file</a> |
| <ul> |
| <li class="chapter" data-level="2.4.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-2"><i class="fa fa-check"></i><b>2.4.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.5" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-parquet-file"><i class="fa fa-check"></i><b>2.5</b> Read a Parquet file</a> |
| <ul> |
| <li class="chapter" data-level="2.5.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-3"><i class="fa fa-check"></i><b>2.5.1</b> Solution</a></li> |
| <li class="chapter" data-level="2.5.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-1"><i class="fa fa-check"></i><b>2.5.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.6" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-parquet-file-from-s3"><i class="fa fa-check"></i><b>2.6</b> Read a Parquet file from S3</a> |
| <ul> |
| <li class="chapter" data-level="2.6.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-4"><i class="fa fa-check"></i><b>2.6.1</b> Solution</a></li> |
| <li class="chapter" data-level="2.6.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#see-also"><i class="fa fa-check"></i><b>2.6.2</b> See also</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.7" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#filter-columns-while-reading-a-parquet-file"><i class="fa fa-check"></i><b>2.7</b> Filter columns while reading a Parquet file</a> |
| <ul> |
| <li class="chapter" data-level="2.7.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-5"><i class="fa fa-check"></i><b>2.7.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.8" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-feather-v2arrow-ipc-file"><i class="fa fa-check"></i><b>2.8</b> Write a Feather V2/Arrow IPC file</a> |
| <ul> |
| <li class="chapter" data-level="2.8.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-6"><i class="fa fa-check"></i><b>2.8.1</b> Solution</a></li> |
| <li class="chapter" data-level="2.8.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-2"><i class="fa fa-check"></i><b>2.8.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.9" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-featherarrow-ipc-file"><i class="fa fa-check"></i><b>2.9</b> Read a Feather/Arrow IPC file</a> |
| <ul> |
| <li class="chapter" data-level="2.9.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-7"><i class="fa fa-check"></i><b>2.9.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.10" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-streaming-arrow-ipc-files"><i class="fa fa-check"></i><b>2.10</b> Write streaming Arrow IPC files</a> |
| <ul> |
| <li class="chapter" data-level="2.10.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-8"><i class="fa fa-check"></i><b>2.10.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.11" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-streaming-arrow-ipc-files"><i class="fa fa-check"></i><b>2.11</b> Read streaming Arrow IPC files</a> |
| <ul> |
| <li class="chapter" data-level="2.11.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-9"><i class="fa fa-check"></i><b>2.11.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.12" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-csv-file"><i class="fa fa-check"></i><b>2.12</b> Write a CSV file</a> |
| <ul> |
| <li class="chapter" data-level="2.12.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-10"><i class="fa fa-check"></i><b>2.12.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.13" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-csv-file"><i class="fa fa-check"></i><b>2.13</b> Read a CSV file</a> |
| <ul> |
| <li class="chapter" data-level="2.13.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-11"><i class="fa fa-check"></i><b>2.13.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.14" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-a-json-file"><i class="fa fa-check"></i><b>2.14</b> Read a JSON file</a> |
| <ul> |
| <li class="chapter" data-level="2.14.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-12"><i class="fa fa-check"></i><b>2.14.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.15" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#write-a-compressed-single-data-file"><i class="fa fa-check"></i><b>2.15</b> Write a compressed single data file</a> |
| <ul> |
| <li class="chapter" data-level="2.15.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-13"><i class="fa fa-check"></i><b>2.15.1</b> Solution</a></li> |
| <li class="chapter" data-level="2.15.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#see-also-1"><i class="fa fa-check"></i><b>2.15.2</b> See also</a></li> |
| </ul></li> |
| <li class="chapter" data-level="2.16" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#read-compressed-data"><i class="fa fa-check"></i><b>2.16</b> Read compressed data</a> |
| <ul> |
| <li class="chapter" data-level="2.16.1" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#solution-14"><i class="fa fa-check"></i><b>2.16.1</b> Solution</a></li> |
| <li class="chapter" data-level="2.16.2" data-path="reading-and-writing-data---single-files.html"><a href="reading-and-writing-data---single-files.html#discussion-3"><i class="fa fa-check"></i><b>2.16.2</b> Discussion</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="3" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html"><i class="fa fa-check"></i><b>3</b> Reading and Writing Data - Multiple Files</a> |
| <ul> |
| <li class="chapter" data-level="3.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#introduction-1"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li> |
| <li class="chapter" data-level="3.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---parquet"><i class="fa fa-check"></i><b>3.2</b> Write data to disk - Parquet</a> |
| <ul> |
| <li class="chapter" data-level="3.2.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-15"><i class="fa fa-check"></i><b>3.2.1</b> Solution</a></li> |
| <li class="chapter" data-level="3.2.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-4"><i class="fa fa-check"></i><b>3.2.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.3" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-partitioned-data---parquet"><i class="fa fa-check"></i><b>3.3</b> Write partitioned data - Parquet</a> |
| <ul> |
| <li class="chapter" data-level="3.3.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-16"><i class="fa fa-check"></i><b>3.3.1</b> Solution</a></li> |
| <li class="chapter" data-level="3.3.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-5"><i class="fa fa-check"></i><b>3.3.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.4" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-partitioned-data"><i class="fa fa-check"></i><b>3.4</b> Read partitioned data</a> |
| <ul> |
| <li class="chapter" data-level="3.4.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-17"><i class="fa fa-check"></i><b>3.4.1</b> Solution</a></li> |
| <li class="chapter" data-level="3.4.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-6"><i class="fa fa-check"></i><b>3.4.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.5" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---featherarrow-ipc-format"><i class="fa fa-check"></i><b>3.5</b> Write data to disk - Feather/Arrow IPC format</a> |
| <ul> |
| <li class="chapter" data-level="3.5.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-18"><i class="fa fa-check"></i><b>3.5.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.6" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-featherarrow-ipc-data-as-an-arrow-dataset"><i class="fa fa-check"></i><b>3.6</b> Read in Feather/Arrow IPC data as an Arrow Dataset</a> |
| <ul> |
| <li class="chapter" data-level="3.6.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-19"><i class="fa fa-check"></i><b>3.6.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.7" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-data-to-disk---csv-format"><i class="fa fa-check"></i><b>3.7</b> Write data to disk - CSV format</a> |
| <ul> |
| <li class="chapter" data-level="3.7.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-20"><i class="fa fa-check"></i><b>3.7.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.8" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-csv-data-as-an-arrow-dataset"><i class="fa fa-check"></i><b>3.8</b> Read in CSV data as an Arrow Dataset</a> |
| <ul> |
| <li class="chapter" data-level="3.8.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-21"><i class="fa fa-check"></i><b>3.8.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.9" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-in-a-csv-dataset-no-headers"><i class="fa fa-check"></i><b>3.9</b> Read in a CSV dataset (no headers)</a> |
| <ul> |
| <li class="chapter" data-level="3.9.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-22"><i class="fa fa-check"></i><b>3.9.1</b> Solution</a></li> |
| <li class="chapter" data-level="3.9.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-7"><i class="fa fa-check"></i><b>3.9.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.10" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#write-compressed-partitioned-data"><i class="fa fa-check"></i><b>3.10</b> Write compressed partitioned data</a> |
| <ul> |
| <li class="chapter" data-level="3.10.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-23"><i class="fa fa-check"></i><b>3.10.1</b> Solution</a></li> |
| <li class="chapter" data-level="3.10.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-8"><i class="fa fa-check"></i><b>3.10.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="3.11" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#read-compressed-data-1"><i class="fa fa-check"></i><b>3.11</b> Read compressed data</a> |
| <ul> |
| <li class="chapter" data-level="3.11.1" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#solution-24"><i class="fa fa-check"></i><b>3.11.1</b> Solution</a></li> |
| <li class="chapter" data-level="3.11.2" data-path="reading-and-writing-data---multiple-files.html"><a href="reading-and-writing-data---multiple-files.html#discussion-9"><i class="fa fa-check"></i><b>3.11.2</b> Discussion</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="4" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html"><i class="fa fa-check"></i><b>4</b> Creating Arrow Objects</a> |
| <ul> |
| <li class="chapter" data-level="4.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#create-an-arrow-array-from-an-r-object"><i class="fa fa-check"></i><b>4.1</b> Create an Arrow Array from an R object</a> |
| <ul> |
| <li class="chapter" data-level="4.1.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-25"><i class="fa fa-check"></i><b>4.1.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="4.2" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#create-a-arrow-table-from-an-r-object"><i class="fa fa-check"></i><b>4.2</b> Create a Arrow Table from an R object</a> |
| <ul> |
| <li class="chapter" data-level="4.2.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-26"><i class="fa fa-check"></i><b>4.2.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="4.3" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#view-the-contents-of-an-arrow-table-or-recordbatch"><i class="fa fa-check"></i><b>4.3</b> View the contents of an Arrow Table or RecordBatch</a> |
| <ul> |
| <li class="chapter" data-level="4.3.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-27"><i class="fa fa-check"></i><b>4.3.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="4.4" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#manually-create-a-recordbatch-from-an-r-object."><i class="fa fa-check"></i><b>4.4</b> Manually create a RecordBatch from an R object.</a> |
| <ul> |
| <li class="chapter" data-level="4.4.1" data-path="creating-arrow-objects.html"><a href="creating-arrow-objects.html#solution-28"><i class="fa fa-check"></i><b>4.4.1</b> Solution</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="5" data-path="defining-data-types.html"><a href="defining-data-types.html"><i class="fa fa-check"></i><b>5</b> Defining Data Types</a> |
| <ul> |
| <li class="chapter" data-level="5.1" data-path="defining-data-types.html"><a href="defining-data-types.html#introduction-2"><i class="fa fa-check"></i><b>5.1</b> Introduction</a></li> |
| <li class="chapter" data-level="5.2" data-path="defining-data-types.html"><a href="defining-data-types.html#update-data-type-of-an-existing-arrow-array"><i class="fa fa-check"></i><b>5.2</b> Update data type of an existing Arrow Array</a> |
| <ul> |
| <li class="chapter" data-level="5.2.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-29"><i class="fa fa-check"></i><b>5.2.1</b> Solution</a></li> |
| <li class="chapter" data-level="5.2.2" data-path="defining-data-types.html"><a href="defining-data-types.html#discussion-10"><i class="fa fa-check"></i><b>5.2.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="5.3" data-path="defining-data-types.html"><a href="defining-data-types.html#update-data-type-of-a-field-in-an-existing-arrow-table"><i class="fa fa-check"></i><b>5.3</b> Update data type of a field in an existing Arrow Table</a> |
| <ul> |
| <li class="chapter" data-level="5.3.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-30"><i class="fa fa-check"></i><b>5.3.1</b> Solution</a></li> |
| <li class="chapter" data-level="5.3.2" data-path="defining-data-types.html"><a href="defining-data-types.html#no-compat-type"><i class="fa fa-check"></i><b>5.3.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="5.4" data-path="defining-data-types.html"><a href="defining-data-types.html#specify-data-types-when-creating-an-arrow-table-from-an-r-object"><i class="fa fa-check"></i><b>5.4</b> Specify data types when creating an Arrow table from an R object</a> |
| <ul> |
| <li class="chapter" data-level="5.4.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-31"><i class="fa fa-check"></i><b>5.4.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="5.5" data-path="defining-data-types.html"><a href="defining-data-types.html#specify-data-types-when-reading-in-files"><i class="fa fa-check"></i><b>5.5</b> Specify data types when reading in files</a> |
| <ul> |
| <li class="chapter" data-level="5.5.1" data-path="defining-data-types.html"><a href="defining-data-types.html#solution-32"><i class="fa fa-check"></i><b>5.5.1</b> Solution</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="6" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html"><i class="fa fa-check"></i><b>6</b> Manipulating Data - Arrays</a> |
| <ul> |
| <li class="chapter" data-level="6.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#introduction-3"><i class="fa fa-check"></i><b>6.1</b> Introduction</a></li> |
| <li class="chapter" data-level="6.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#filter-by-values-matching-a-predicate-or-mask"><i class="fa fa-check"></i><b>6.2</b> Filter by values matching a predicate or mask</a> |
| <ul> |
| <li class="chapter" data-level="6.2.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-33"><i class="fa fa-check"></i><b>6.2.1</b> Solution</a></li> |
| <li class="chapter" data-level="6.2.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-11"><i class="fa fa-check"></i><b>6.2.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="6.3" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#compute-meanminmax-etc-value-of-an-array"><i class="fa fa-check"></i><b>6.3</b> Compute Mean/Min/Max, etc value of an Array</a> |
| <ul> |
| <li class="chapter" data-level="6.3.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-34"><i class="fa fa-check"></i><b>6.3.1</b> Solution</a></li> |
| <li class="chapter" data-level="6.3.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-12"><i class="fa fa-check"></i><b>6.3.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="6.4" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#count-occurrences-of-elements-in-an-array"><i class="fa fa-check"></i><b>6.4</b> Count occurrences of elements in an Array</a> |
| <ul> |
| <li class="chapter" data-level="6.4.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-35"><i class="fa fa-check"></i><b>6.4.1</b> Solution</a></li> |
| <li class="chapter" data-level="6.4.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-13"><i class="fa fa-check"></i><b>6.4.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="6.5" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#apply-arithmetic-functions-to-arrays."><i class="fa fa-check"></i><b>6.5</b> Apply arithmetic functions to Arrays.</a> |
| <ul> |
| <li class="chapter" data-level="6.5.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-36"><i class="fa fa-check"></i><b>6.5.1</b> Solution</a></li> |
| <li class="chapter" data-level="6.5.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-14"><i class="fa fa-check"></i><b>6.5.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="6.6" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#call-arrow-compute-functions-directly-on-arrays"><i class="fa fa-check"></i><b>6.6</b> Call Arrow compute functions directly on Arrays</a> |
| <ul> |
| <li class="chapter" data-level="6.6.1" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#solution-37"><i class="fa fa-check"></i><b>6.6.1</b> Solution</a></li> |
| <li class="chapter" data-level="6.6.2" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#discussion-15"><i class="fa fa-check"></i><b>6.6.2</b> Discussion</a></li> |
| <li class="chapter" data-level="6.6.3" data-path="manipulating-data---arrays.html"><a href="manipulating-data---arrays.html#see-also-2"><i class="fa fa-check"></i><b>6.6.3</b> See also</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="7" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html"><i class="fa fa-check"></i><b>7</b> Manipulating Data - Tables</a> |
| <ul> |
| <li class="chapter" data-level="7.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#introduction-4"><i class="fa fa-check"></i><b>7.1</b> Introduction</a></li> |
| <li class="chapter" data-level="7.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.2</b> Use dplyr verbs in Arrow</a> |
| <ul> |
| <li class="chapter" data-level="7.2.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-38"><i class="fa fa-check"></i><b>7.2.1</b> Solution</a></li> |
| <li class="chapter" data-level="7.2.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-16"><i class="fa fa-check"></i><b>7.2.2</b> Discussion</a></li> |
| <li class="chapter" data-level="7.2.3" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#see-also-3"><i class="fa fa-check"></i><b>7.2.3</b> See also</a></li> |
| </ul></li> |
| <li class="chapter" data-level="7.3" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-r-functions-in-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.3</b> Use R functions in dplyr verbs in Arrow</a> |
| <ul> |
| <li class="chapter" data-level="7.3.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-39"><i class="fa fa-check"></i><b>7.3.1</b> Solution</a></li> |
| <li class="chapter" data-level="7.3.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-17"><i class="fa fa-check"></i><b>7.3.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="7.4" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#use-arrow-functions-in-dplyr-verbs-in-arrow"><i class="fa fa-check"></i><b>7.4</b> Use Arrow functions in dplyr verbs in Arrow</a> |
| <ul> |
| <li class="chapter" data-level="7.4.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-40"><i class="fa fa-check"></i><b>7.4.1</b> Solution</a></li> |
| <li class="chapter" data-level="7.4.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discussion-18"><i class="fa fa-check"></i><b>7.4.2</b> Discussion</a></li> |
| </ul></li> |
| <li class="chapter" data-level="7.5" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#compute-window-aggregates"><i class="fa fa-check"></i><b>7.5</b> Compute Window Aggregates</a> |
| <ul> |
| <li class="chapter" data-level="7.5.1" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#solution-41"><i class="fa fa-check"></i><b>7.5.1</b> Solution</a></li> |
| <li class="chapter" data-level="7.5.2" data-path="manipulating-data---tables.html"><a href="manipulating-data---tables.html#discusson"><i class="fa fa-check"></i><b>7.5.2</b> Discusson</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="8" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html"><i class="fa fa-check"></i><b>8</b> Using PyArrow from R</a> |
| <ul> |
| <li class="chapter" data-level="8.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#introduction-5"><i class="fa fa-check"></i><b>8.1</b> Introduction</a></li> |
| <li class="chapter" data-level="8.2" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#create-an-arrow-object-using-pyarrow-in-r"><i class="fa fa-check"></i><b>8.2</b> Create an Arrow object using PyArrow in R</a> |
| <ul> |
| <li class="chapter" data-level="8.2.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#solution-42"><i class="fa fa-check"></i><b>8.2.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="8.3" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#call-a-pyarrow-function-from-r"><i class="fa fa-check"></i><b>8.3</b> Call a PyArrow function from R</a> |
| <ul> |
| <li class="chapter" data-level="8.3.1" data-path="using-pyarrow-from-r.html"><a href="using-pyarrow-from-r.html#solution-43"><i class="fa fa-check"></i><b>8.3.1</b> Solution</a></li> |
| </ul></li> |
| </ul></li> |
| <li class="chapter" data-level="9" data-path="flight.html"><a href="flight.html"><i class="fa fa-check"></i><b>9</b> Flight</a> |
| <ul> |
| <li class="chapter" data-level="9.1" data-path="flight.html"><a href="flight.html#introduction-6"><i class="fa fa-check"></i><b>9.1</b> Introduction</a></li> |
| <li class="chapter" data-level="9.2" data-path="flight.html"><a href="flight.html#connect-to-a-flight-server"><i class="fa fa-check"></i><b>9.2</b> Connect to a Flight server</a> |
| <ul> |
| <li class="chapter" data-level="9.2.1" data-path="flight.html"><a href="flight.html#solution-44"><i class="fa fa-check"></i><b>9.2.1</b> Solution</a></li> |
| <li class="chapter" data-level="9.2.2" data-path="flight.html"><a href="flight.html#see-also-4"><i class="fa fa-check"></i><b>9.2.2</b> See also</a></li> |
| </ul></li> |
| <li class="chapter" data-level="9.3" data-path="flight.html"><a href="flight.html#send-data-to-a-flight-server"><i class="fa fa-check"></i><b>9.3</b> Send data to a Flight server</a> |
| <ul> |
| <li class="chapter" data-level="9.3.1" data-path="flight.html"><a href="flight.html#solution-45"><i class="fa fa-check"></i><b>9.3.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="9.4" data-path="flight.html"><a href="flight.html#check-what-resources-exist-on-a-flight-server"><i class="fa fa-check"></i><b>9.4</b> Check what resources exist on a Flight server</a> |
| <ul> |
| <li class="chapter" data-level="9.4.1" data-path="flight.html"><a href="flight.html#solution-46"><i class="fa fa-check"></i><b>9.4.1</b> Solution</a></li> |
| </ul></li> |
| <li class="chapter" data-level="9.5" data-path="flight.html"><a href="flight.html#retrieve-data-from-a-flight-server"><i class="fa fa-check"></i><b>9.5</b> Retrieve data from a Flight server</a> |
| <ul> |
| <li class="chapter" data-level="9.5.1" data-path="flight.html"><a href="flight.html#solution-47"><i class="fa fa-check"></i><b>9.5.1</b> Solution</a></li> |
| </ul></li> |
| </ul></li> |
| </ul> |
| |
| </nav> |
| </div> |
| |
| <div class="book-body"> |
| <div class="body-inner"> |
| <div class="book-header" role="navigation"> |
| <h1> |
| <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Apache Arrow R Cookbook</a> |
| </h1> |
| </div> |
| |
| <div class="page-wrapper" tabindex="-1" role="main"> |
| <div class="page-inner"> |
| |
| <section class="normal" id="section-"> |
| <div id="reading-and-writing-data---single-files" class="section level1 hasAnchor" number="2"> |
| <h1><span class="header-section-number">2</span> Reading and Writing Data - Single Files<a href="reading-and-writing-data---single-files.html#reading-and-writing-data---single-files" class="anchor-section" aria-label="Anchor link to header"></a></h1> |
| <div id="introduction" class="section level2 hasAnchor" number="2.1"> |
| <h2><span class="header-section-number">2.1</span> Introduction<a href="reading-and-writing-data---single-files.html#introduction" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>When reading files into R using Apache Arrow, you can read:</p> |
| <ul> |
| <li>a single file into memory as a data frame or an Arrow Table</li> |
| <li>a single file that is too large to fit in memory as an Arrow Dataset</li> |
| <li>multiple and partitioned files as an Arrow Dataset</li> |
| </ul> |
| <p>This chapter contains recipes related to using Apache Arrow to read and |
| write single file data into memory as an Arrow Table. There are a number of circumstances in |
| which you may want to read in single file data as an Arrow Table:</p> |
| <ul> |
| <li>your data file is large and having performance issues</li> |
| <li>you want faster performance from your <code>dplyr</code> queries</li> |
| <li>you want to be able to take advantage of Arrow’s compute functions</li> |
| </ul> |
| <p>If a single data file is too large to load into memory, you can use the Arrow Dataset API. |
| Recipes for using <code>open_dataset()</code> and <code>write_dataset()</code> are in the Reading and Writing Data - Multiple Files |
| chapter.</p> |
| </div> |
| <div id="convert-data-from-a-data-frame-to-an-arrow-table" class="section level2 hasAnchor" number="2.2"> |
| <h2><span class="header-section-number">2.2</span> Convert data from a data frame to an Arrow Table<a href="reading-and-writing-data---single-files.html#convert-data-from-a-data-frame-to-an-arrow-table" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to convert an existing <code>data.frame</code> or <code>tibble</code> object into an Arrow Table.</p> |
| <div id="solution" class="section level3 hasAnchor" number="2.2.1"> |
| <h3><span class="header-section-number">2.2.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="reading-and-writing-data---single-files.html#cb1-1" tabindex="-1"></a>air_table <span class="ot"><-</span> <span class="fu">arrow_table</span>(airquality)</span> |
| <span id="cb1-2"><a href="reading-and-writing-data---single-files.html#cb1-2" tabindex="-1"></a>air_table</span></code></pre></div> |
| <pre><code>## Table |
| ## 153 rows x 6 columns |
| ## $Ozone <int32> |
| ## $Solar.R <int32> |
| ## $Wind <double> |
| ## $Temp <int32> |
| ## $Month <int32> |
| ## $Day <int32> |
| ## |
| ## See $metadata for additional Schema metadata</code></pre> |
| </div> |
| </div> |
| <div id="convert-data-from-an-arrow-table-to-a-data-frame" class="section level2 hasAnchor" number="2.3"> |
| <h2><span class="header-section-number">2.3</span> Convert data from an Arrow Table to a data frame<a href="reading-and-writing-data---single-files.html#convert-data-from-an-arrow-table-to-a-data-frame" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to convert an Arrow Table to a data frame to view the data or work with it |
| in your usual analytics pipeline.</p> |
| <div id="solution-1" class="section level3 hasAnchor" number="2.3.1"> |
| <h3><span class="header-section-number">2.3.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-1" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="reading-and-writing-data---single-files.html#cb3-1" tabindex="-1"></a>air_df <span class="ot"><-</span> <span class="fu">as.data.frame</span>(air_table)</span> |
| <span id="cb3-2"><a href="reading-and-writing-data---single-files.html#cb3-2" tabindex="-1"></a>air_df</span></code></pre></div> |
| <pre><code>## Ozone Solar.R Wind Temp Month Day |
| ## 1 41 190 7.4 67 5 1 |
| ## 2 36 118 8.0 72 5 2 |
| ## 3 12 149 12.6 74 5 3 |
| ## 4 18 313 11.5 62 5 4 |
| ## 5 NA NA 14.3 56 5 5 |
| ## 6 28 NA 14.9 66 5 6 |
| ## 7 23 299 8.6 65 5 7 |
| ## 8 19 99 13.8 59 5 8 |
| ## 9 8 19 20.1 61 5 9 |
| ## 10 NA 194 8.6 69 5 10 |
| ## 11 7 NA 6.9 74 5 11 |
| ## 12 16 256 9.7 69 5 12 |
| ## 13 11 290 9.2 66 5 13 |
| ## 14 14 274 10.9 68 5 14 |
| ## 15 18 65 13.2 58 5 15 |
| ## 16 14 334 11.5 64 5 16 |
| ## 17 34 307 12.0 66 5 17 |
| ## 18 6 78 18.4 57 5 18 |
| ## 19 30 322 11.5 68 5 19 |
| ## 20 11 44 9.7 62 5 20 |
| ## 21 1 8 9.7 59 5 21 |
| ## 22 11 320 16.6 73 5 22 |
| ## 23 4 25 9.7 61 5 23 |
| ## 24 32 92 12.0 61 5 24 |
| ## 25 NA 66 16.6 57 5 25 |
| ## 26 NA 266 14.9 58 5 26 |
| ## 27 NA NA 8.0 57 5 27 |
| ## 28 23 13 12.0 67 5 28 |
| ## 29 45 252 14.9 81 5 29 |
| ## 30 115 223 5.7 79 5 30 |
| ## 31 37 279 7.4 76 5 31 |
| ## 32 NA 286 8.6 78 6 1 |
| ## 33 NA 287 9.7 74 6 2 |
| ## 34 NA 242 16.1 67 6 3 |
| ## 35 NA 186 9.2 84 6 4 |
| ## 36 NA 220 8.6 85 6 5 |
| ## 37 NA 264 14.3 79 6 6 |
| ## 38 29 127 9.7 82 6 7 |
| ## 39 NA 273 6.9 87 6 8 |
| ## 40 71 291 13.8 90 6 9 |
| ## 41 39 323 11.5 87 6 10 |
| ## 42 NA 259 10.9 93 6 11 |
| ## 43 NA 250 9.2 92 6 12 |
| ## 44 23 148 8.0 82 6 13 |
| ## 45 NA 332 13.8 80 6 14 |
| ## 46 NA 322 11.5 79 6 15 |
| ## 47 21 191 14.9 77 6 16 |
| ## 48 37 284 20.7 72 6 17 |
| ## 49 20 37 9.2 65 6 18 |
| ## 50 12 120 11.5 73 6 19 |
| ## 51 13 137 10.3 76 6 20 |
| ## 52 NA 150 6.3 77 6 21 |
| ## 53 NA 59 1.7 76 6 22 |
| ## 54 NA 91 4.6 76 6 23 |
| ## 55 NA 250 6.3 76 6 24 |
| ## 56 NA 135 8.0 75 6 25 |
| ## 57 NA 127 8.0 78 6 26 |
| ## 58 NA 47 10.3 73 6 27 |
| ## 59 NA 98 11.5 80 6 28 |
| ## 60 NA 31 14.9 77 6 29 |
| ## 61 NA 138 8.0 83 6 30 |
| ## 62 135 269 4.1 84 7 1 |
| ## 63 49 248 9.2 85 7 2 |
| ## 64 32 236 9.2 81 7 3 |
| ## 65 NA 101 10.9 84 7 4 |
| ## 66 64 175 4.6 83 7 5 |
| ## 67 40 314 10.9 83 7 6 |
| ## 68 77 276 5.1 88 7 7 |
| ## 69 97 267 6.3 92 7 8 |
| ## 70 97 272 5.7 92 7 9 |
| ## 71 85 175 7.4 89 7 10 |
| ## 72 NA 139 8.6 82 7 11 |
| ## 73 10 264 14.3 73 7 12 |
| ## 74 27 175 14.9 81 7 13 |
| ## 75 NA 291 14.9 91 7 14 |
| ## 76 7 48 14.3 80 7 15 |
| ## 77 48 260 6.9 81 7 16 |
| ## 78 35 274 10.3 82 7 17 |
| ## 79 61 285 6.3 84 7 18 |
| ## 80 79 187 5.1 87 7 19 |
| ## 81 63 220 11.5 85 7 20 |
| ## 82 16 7 6.9 74 7 21 |
| ## 83 NA 258 9.7 81 7 22 |
| ## 84 NA 295 11.5 82 7 23 |
| ## 85 80 294 8.6 86 7 24 |
| ## 86 108 223 8.0 85 7 25 |
| ## 87 20 81 8.6 82 7 26 |
| ## 88 52 82 12.0 86 7 27 |
| ## 89 82 213 7.4 88 7 28 |
| ## 90 50 275 7.4 86 7 29 |
| ## 91 64 253 7.4 83 7 30 |
| ## 92 59 254 9.2 81 7 31 |
| ## 93 39 83 6.9 81 8 1 |
| ## 94 9 24 13.8 81 8 2 |
| ## 95 16 77 7.4 82 8 3 |
| ## 96 78 NA 6.9 86 8 4 |
| ## 97 35 NA 7.4 85 8 5 |
| ## 98 66 NA 4.6 87 8 6 |
| ## 99 122 255 4.0 89 8 7 |
| ## 100 89 229 10.3 90 8 8 |
| ## 101 110 207 8.0 90 8 9 |
| ## 102 NA 222 8.6 92 8 10 |
| ## 103 NA 137 11.5 86 8 11 |
| ## 104 44 192 11.5 86 8 12 |
| ## 105 28 273 11.5 82 8 13 |
| ## 106 65 157 9.7 80 8 14 |
| ## 107 NA 64 11.5 79 8 15 |
| ## 108 22 71 10.3 77 8 16 |
| ## 109 59 51 6.3 79 8 17 |
| ## 110 23 115 7.4 76 8 18 |
| ## 111 31 244 10.9 78 8 19 |
| ## 112 44 190 10.3 78 8 20 |
| ## 113 21 259 15.5 77 8 21 |
| ## 114 9 36 14.3 72 8 22 |
| ## 115 NA 255 12.6 75 8 23 |
| ## 116 45 212 9.7 79 8 24 |
| ## 117 168 238 3.4 81 8 25 |
| ## 118 73 215 8.0 86 8 26 |
| ## 119 NA 153 5.7 88 8 27 |
| ## 120 76 203 9.7 97 8 28 |
| ## 121 118 225 2.3 94 8 29 |
| ## 122 84 237 6.3 96 8 30 |
| ## 123 85 188 6.3 94 8 31 |
| ## 124 96 167 6.9 91 9 1 |
| ## 125 78 197 5.1 92 9 2 |
| ## 126 73 183 2.8 93 9 3 |
| ## 127 91 189 4.6 93 9 4 |
| ## 128 47 95 7.4 87 9 5 |
| ## 129 32 92 15.5 84 9 6 |
| ## 130 20 252 10.9 80 9 7 |
| ## 131 23 220 10.3 78 9 8 |
| ## 132 21 230 10.9 75 9 9 |
| ## 133 24 259 9.7 73 9 10 |
| ## 134 44 236 14.9 81 9 11 |
| ## 135 21 259 15.5 76 9 12 |
| ## 136 28 238 6.3 77 9 13 |
| ## 137 9 24 10.9 71 9 14 |
| ## 138 13 112 11.5 71 9 15 |
| ## 139 46 237 6.9 78 9 16 |
| ## 140 18 224 13.8 67 9 17 |
| ## 141 13 27 10.3 76 9 18 |
| ## 142 24 238 10.3 68 9 19 |
| ## 143 16 201 8.0 82 9 20 |
| ## 144 13 238 12.6 64 9 21 |
| ## 145 23 14 9.2 71 9 22 |
| ## 146 36 139 10.3 81 9 23 |
| ## 147 7 49 10.3 69 9 24 |
| ## 148 14 20 16.6 63 9 25 |
| ## 149 30 193 6.9 70 9 26 |
| ## 150 NA 145 13.2 77 9 27 |
| ## 151 14 191 14.3 75 9 28 |
| ## 152 18 131 8.0 76 9 29 |
| ## 153 20 223 11.5 68 9 30</code></pre> |
| </div> |
| <div id="discussion" class="section level3 hasAnchor" number="2.3.2"> |
| <h3><span class="header-section-number">2.3.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <p>You can <code>dplyr::collect()</code> to return a tibble or <code>as.data.frame()</code> to return a <code>data.frame</code>.</p> |
| </div> |
| </div> |
| <div id="write-a-parquet-file" class="section level2 hasAnchor" number="2.4"> |
| <h2><span class="header-section-number">2.4</span> Write a Parquet file<a href="reading-and-writing-data---single-files.html#write-a-parquet-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to write a single Parquet file to disk.</p> |
| <div id="solution-2" class="section level3 hasAnchor" number="2.4.1"> |
| <h3><span class="header-section-number">2.4.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-2" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="reading-and-writing-data---single-files.html#cb5-1" tabindex="-1"></a><span class="co"># Create table</span></span> |
| <span id="cb5-2"><a href="reading-and-writing-data---single-files.html#cb5-2" tabindex="-1"></a>my_table <span class="ot"><-</span> <span class="fu">arrow_table</span>(tibble<span class="sc">::</span><span class="fu">tibble</span>(<span class="at">group =</span> <span class="fu">c</span>(<span class="st">"A"</span>, <span class="st">"B"</span>, <span class="st">"C"</span>), <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)))</span> |
| <span id="cb5-3"><a href="reading-and-writing-data---single-files.html#cb5-3" tabindex="-1"></a><span class="co"># Write to Parquet</span></span> |
| <span id="cb5-4"><a href="reading-and-writing-data---single-files.html#cb5-4" tabindex="-1"></a><span class="fu">write_parquet</span>(my_table, <span class="st">"my_table.parquet"</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="read-a-parquet-file" class="section level2 hasAnchor" number="2.5"> |
| <h2><span class="header-section-number">2.5</span> Read a Parquet file<a href="reading-and-writing-data---single-files.html#read-a-parquet-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read a single Parquet file into memory.</p> |
| <div id="solution-3" class="section level3 hasAnchor" number="2.5.1"> |
| <h3><span class="header-section-number">2.5.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-3" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="reading-and-writing-data---single-files.html#cb6-1" tabindex="-1"></a>parquet_tbl <span class="ot"><-</span> <span class="fu">read_parquet</span>(<span class="st">"my_table.parquet"</span>)</span> |
| <span id="cb6-2"><a href="reading-and-writing-data---single-files.html#cb6-2" tabindex="-1"></a>parquet_tbl</span></code></pre></div> |
| <pre><code>## # A tibble: 3 × 2 |
| ## group score |
| ## <chr> <dbl> |
| ## 1 A 99 |
| ## 2 B 97 |
| ## 3 C 99</code></pre> |
| <p>As the argument <code>as_data_frame</code> was left set to its default value of <code>TRUE</code>, the file was read in as a tibble.</p> |
| <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="reading-and-writing-data---single-files.html#cb8-1" tabindex="-1"></a><span class="fu">class</span>(parquet_tbl)</span></code></pre></div> |
| <pre><code>## [1] "tbl_df" "tbl" "data.frame"</code></pre> |
| </div> |
| <div id="discussion-1" class="section level3 hasAnchor" number="2.5.2"> |
| <h3><span class="header-section-number">2.5.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion-1" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <p>If you set <code>as_data_frame</code> to <code>FALSE</code>, the file will be read in as an Arrow Table.</p> |
| <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="reading-and-writing-data---single-files.html#cb10-1" tabindex="-1"></a>my_table_arrow <span class="ot"><-</span> <span class="fu">read_parquet</span>(<span class="st">"my_table.parquet"</span>, <span class="at">as_data_frame =</span> <span class="cn">FALSE</span>)</span> |
| <span id="cb10-2"><a href="reading-and-writing-data---single-files.html#cb10-2" tabindex="-1"></a>my_table_arrow</span></code></pre></div> |
| <pre><code>## Table |
| ## 3 rows x 2 columns |
| ## $group <string> |
| ## $score <double></code></pre> |
| <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="reading-and-writing-data---single-files.html#cb12-1" tabindex="-1"></a><span class="fu">class</span>(my_table_arrow)</span></code></pre></div> |
| <pre><code>## [1] "Table" "ArrowTabular" "ArrowObject" "R6"</code></pre> |
| </div> |
| </div> |
| <div id="read-a-parquet-file-from-s3" class="section level2 hasAnchor" number="2.6"> |
| <h2><span class="header-section-number">2.6</span> Read a Parquet file from S3<a href="reading-and-writing-data---single-files.html#read-a-parquet-file-from-s3" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read a single Parquet file from S3 into memory.</p> |
| <div id="solution-4" class="section level3 hasAnchor" number="2.6.1"> |
| <h3><span class="header-section-number">2.6.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-4" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="reading-and-writing-data---single-files.html#cb14-1" tabindex="-1"></a>df <span class="ot"><-</span> <span class="fu">read_parquet</span>(<span class="at">file =</span> <span class="st">"s3://voltrondata-labs-datasets/nyc-taxi/year=2019/month=6/part-0.parquet"</span>)</span></code></pre></div> |
| </div> |
| <div id="see-also" class="section level3 hasAnchor" number="2.6.2"> |
| <h3><span class="header-section-number">2.6.2</span> See also<a href="reading-and-writing-data---single-files.html#see-also" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <p>For more in-depth instructions, including how to work with S3 buckets which require authentication, you can find a guide to reading and writing to/from S3 buckets here: <a href="https://arrow.apache.org/docs/r/articles/fs.html" class="uri">https://arrow.apache.org/docs/r/articles/fs.html</a>.</p> |
| </div> |
| </div> |
| <div id="filter-columns-while-reading-a-parquet-file" class="section level2 hasAnchor" number="2.7"> |
| <h2><span class="header-section-number">2.7</span> Filter columns while reading a Parquet file<a href="reading-and-writing-data---single-files.html#filter-columns-while-reading-a-parquet-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to specify which columns to include when reading in a single Parquet file into memory.</p> |
| <div id="solution-5" class="section level3 hasAnchor" number="2.7.1"> |
| <h3><span class="header-section-number">2.7.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-5" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="reading-and-writing-data---single-files.html#cb15-1" tabindex="-1"></a><span class="co"># Create table to read back in</span></span> |
| <span id="cb15-2"><a href="reading-and-writing-data---single-files.html#cb15-2" tabindex="-1"></a>dist_time <span class="ot"><-</span> <span class="fu">arrow_table</span>(<span class="fu">data.frame</span>(<span class="at">distance =</span> <span class="fu">c</span>(<span class="fl">12.2</span>, <span class="fl">15.7</span>, <span class="fl">14.2</span>), <span class="at">time =</span> <span class="fu">c</span>(<span class="dv">43</span>, <span class="dv">44</span>, <span class="dv">40</span>)))</span> |
| <span id="cb15-3"><a href="reading-and-writing-data---single-files.html#cb15-3" tabindex="-1"></a><span class="co"># Write to Parquet</span></span> |
| <span id="cb15-4"><a href="reading-and-writing-data---single-files.html#cb15-4" tabindex="-1"></a><span class="fu">write_parquet</span>(dist_time, <span class="st">"dist_time.parquet"</span>)</span> |
| <span id="cb15-5"><a href="reading-and-writing-data---single-files.html#cb15-5" tabindex="-1"></a></span> |
| <span id="cb15-6"><a href="reading-and-writing-data---single-files.html#cb15-6" tabindex="-1"></a><span class="co"># Read in only the "time" column</span></span> |
| <span id="cb15-7"><a href="reading-and-writing-data---single-files.html#cb15-7" tabindex="-1"></a>time_only <span class="ot"><-</span> <span class="fu">read_parquet</span>(<span class="st">"dist_time.parquet"</span>, <span class="at">col_select =</span> <span class="st">"time"</span>)</span> |
| <span id="cb15-8"><a href="reading-and-writing-data---single-files.html#cb15-8" tabindex="-1"></a>time_only</span></code></pre></div> |
| <pre><code>## # A tibble: 3 × 1 |
| ## time |
| ## <dbl> |
| ## 1 43 |
| ## 2 44 |
| ## 3 40</code></pre> |
| </div> |
| </div> |
| <div id="write-a-feather-v2arrow-ipc-file" class="section level2 hasAnchor" number="2.8"> |
| <h2><span class="header-section-number">2.8</span> Write a Feather V2/Arrow IPC file<a href="reading-and-writing-data---single-files.html#write-a-feather-v2arrow-ipc-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to write a single Feather V2 file (also called Arrow IPC file).</p> |
| <div id="solution-6" class="section level3 hasAnchor" number="2.8.1"> |
| <h3><span class="header-section-number">2.8.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-6" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="reading-and-writing-data---single-files.html#cb17-1" tabindex="-1"></a>my_table <span class="ot"><-</span> <span class="fu">arrow_table</span>(<span class="fu">data.frame</span>(<span class="at">group =</span> <span class="fu">c</span>(<span class="st">"A"</span>, <span class="st">"B"</span>, <span class="st">"C"</span>), <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)))</span> |
| <span id="cb17-2"><a href="reading-and-writing-data---single-files.html#cb17-2" tabindex="-1"></a><span class="fu">write_feather</span>(my_table, <span class="st">"my_table.arrow"</span>)</span></code></pre></div> |
| </div> |
| <div id="discussion-2" class="section level3 hasAnchor" number="2.8.2"> |
| <h3><span class="header-section-number">2.8.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion-2" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <p>For legacy support, you can write data in the original Feather format by setting the <code>version</code> parameter to <code>1</code>.</p> |
| <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="reading-and-writing-data---single-files.html#cb18-1" tabindex="-1"></a><span class="co"># Create table</span></span> |
| <span id="cb18-2"><a href="reading-and-writing-data---single-files.html#cb18-2" tabindex="-1"></a>my_table <span class="ot"><-</span> <span class="fu">arrow_table</span>(<span class="fu">data.frame</span>(<span class="at">group =</span> <span class="fu">c</span>(<span class="st">"A"</span>, <span class="st">"B"</span>, <span class="st">"C"</span>), <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)))</span> |
| <span id="cb18-3"><a href="reading-and-writing-data---single-files.html#cb18-3" tabindex="-1"></a><span class="co"># Write to Feather format V1</span></span> |
| <span id="cb18-4"><a href="reading-and-writing-data---single-files.html#cb18-4" tabindex="-1"></a><span class="fu">write_feather</span>(mtcars, <span class="st">"my_table.feather"</span>, <span class="at">version =</span> <span class="dv">1</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="read-a-featherarrow-ipc-file" class="section level2 hasAnchor" number="2.9"> |
| <h2><span class="header-section-number">2.9</span> Read a Feather/Arrow IPC file<a href="reading-and-writing-data---single-files.html#read-a-featherarrow-ipc-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read a single Feather V1 or V2 file into memory (also called Arrow IPC file).</p> |
| <div id="solution-7" class="section level3 hasAnchor" number="2.9.1"> |
| <h3><span class="header-section-number">2.9.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-7" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="reading-and-writing-data---single-files.html#cb19-1" tabindex="-1"></a>my_feather_tbl <span class="ot"><-</span> <span class="fu">read_feather</span>(<span class="st">"my_table.arrow"</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="write-streaming-arrow-ipc-files" class="section level2 hasAnchor" number="2.10"> |
| <h2><span class="header-section-number">2.10</span> Write streaming Arrow IPC files<a href="reading-and-writing-data---single-files.html#write-streaming-arrow-ipc-files" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to write to the Arrow IPC stream format.</p> |
| <div id="solution-8" class="section level3 hasAnchor" number="2.10.1"> |
| <h3><span class="header-section-number">2.10.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-8" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="reading-and-writing-data---single-files.html#cb20-1" tabindex="-1"></a><span class="co"># Create table</span></span> |
| <span id="cb20-2"><a href="reading-and-writing-data---single-files.html#cb20-2" tabindex="-1"></a>my_table <span class="ot"><-</span> <span class="fu">arrow_table</span>(</span> |
| <span id="cb20-3"><a href="reading-and-writing-data---single-files.html#cb20-3" tabindex="-1"></a> <span class="fu">data.frame</span>(</span> |
| <span id="cb20-4"><a href="reading-and-writing-data---single-files.html#cb20-4" tabindex="-1"></a> <span class="at">group =</span> <span class="fu">c</span>(<span class="st">"A"</span>, <span class="st">"B"</span>, <span class="st">"C"</span>),</span> |
| <span id="cb20-5"><a href="reading-and-writing-data---single-files.html#cb20-5" tabindex="-1"></a> <span class="at">score =</span> <span class="fu">c</span>(<span class="dv">99</span>, <span class="dv">97</span>, <span class="dv">99</span>)</span> |
| <span id="cb20-6"><a href="reading-and-writing-data---single-files.html#cb20-6" tabindex="-1"></a> )</span> |
| <span id="cb20-7"><a href="reading-and-writing-data---single-files.html#cb20-7" tabindex="-1"></a>)</span> |
| <span id="cb20-8"><a href="reading-and-writing-data---single-files.html#cb20-8" tabindex="-1"></a><span class="co"># Write to IPC stream format</span></span> |
| <span id="cb20-9"><a href="reading-and-writing-data---single-files.html#cb20-9" tabindex="-1"></a><span class="fu">write_ipc_stream</span>(my_table, <span class="st">"my_table.arrows"</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="read-streaming-arrow-ipc-files" class="section level2 hasAnchor" number="2.11"> |
| <h2><span class="header-section-number">2.11</span> Read streaming Arrow IPC files<a href="reading-and-writing-data---single-files.html#read-streaming-arrow-ipc-files" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read from the Arrow IPC stream format.</p> |
| <div id="solution-9" class="section level3 hasAnchor" number="2.11.1"> |
| <h3><span class="header-section-number">2.11.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-9" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="reading-and-writing-data---single-files.html#cb21-1" tabindex="-1"></a>my_ipc_stream <span class="ot"><-</span> arrow<span class="sc">::</span><span class="fu">read_ipc_stream</span>(<span class="st">"my_table.arrows"</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="write-a-csv-file" class="section level2 hasAnchor" number="2.12"> |
| <h2><span class="header-section-number">2.12</span> Write a CSV file<a href="reading-and-writing-data---single-files.html#write-a-csv-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to write Arrow data to a single CSV file.</p> |
| <div id="solution-10" class="section level3 hasAnchor" number="2.12.1"> |
| <h3><span class="header-section-number">2.12.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-10" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="reading-and-writing-data---single-files.html#cb22-1" tabindex="-1"></a><span class="fu">write_csv_arrow</span>(cars, <span class="st">"cars.csv"</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="read-a-csv-file" class="section level2 hasAnchor" number="2.13"> |
| <h2><span class="header-section-number">2.13</span> Read a CSV file<a href="reading-and-writing-data---single-files.html#read-a-csv-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read a single CSV file into memory.</p> |
| <div id="solution-11" class="section level3 hasAnchor" number="2.13.1"> |
| <h3><span class="header-section-number">2.13.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-11" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="reading-and-writing-data---single-files.html#cb23-1" tabindex="-1"></a>my_csv <span class="ot"><-</span> <span class="fu">read_csv_arrow</span>(<span class="st">"cars.csv"</span>, <span class="at">as_data_frame =</span> <span class="cn">FALSE</span>)</span></code></pre></div> |
| </div> |
| </div> |
| <div id="read-a-json-file" class="section level2 hasAnchor" number="2.14"> |
| <h2><span class="header-section-number">2.14</span> Read a JSON file<a href="reading-and-writing-data---single-files.html#read-a-json-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read a JSON file into memory.</p> |
| <div id="solution-12" class="section level3 hasAnchor" number="2.14.1"> |
| <h3><span class="header-section-number">2.14.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-12" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="reading-and-writing-data---single-files.html#cb24-1" tabindex="-1"></a><span class="co"># Create a file to read back in</span></span> |
| <span id="cb24-2"><a href="reading-and-writing-data---single-files.html#cb24-2" tabindex="-1"></a>tf <span class="ot"><-</span> <span class="fu">tempfile</span>()</span> |
| <span id="cb24-3"><a href="reading-and-writing-data---single-files.html#cb24-3" tabindex="-1"></a><span class="fu">writeLines</span>(<span class="st">'</span></span> |
| <span id="cb24-4"><a href="reading-and-writing-data---single-files.html#cb24-4" tabindex="-1"></a><span class="st"> {"country": "United Kingdom", "code": "GB", "long": -3.44, "lat": 55.38}</span></span> |
| <span id="cb24-5"><a href="reading-and-writing-data---single-files.html#cb24-5" tabindex="-1"></a><span class="st"> {"country": "France", "code": "FR", "long": 2.21, "lat": 46.23}</span></span> |
| <span id="cb24-6"><a href="reading-and-writing-data---single-files.html#cb24-6" tabindex="-1"></a><span class="st"> {"country": "Germany", "code": "DE", "long": 10.45, "lat": 51.17}</span></span> |
| <span id="cb24-7"><a href="reading-and-writing-data---single-files.html#cb24-7" tabindex="-1"></a><span class="st"> '</span>, tf, <span class="at">useBytes =</span> <span class="cn">TRUE</span>)</span> |
| <span id="cb24-8"><a href="reading-and-writing-data---single-files.html#cb24-8" tabindex="-1"></a></span> |
| <span id="cb24-9"><a href="reading-and-writing-data---single-files.html#cb24-9" tabindex="-1"></a><span class="co"># Read in the data</span></span> |
| <span id="cb24-10"><a href="reading-and-writing-data---single-files.html#cb24-10" tabindex="-1"></a>countries <span class="ot"><-</span> <span class="fu">read_json_arrow</span>(tf, <span class="at">col_select =</span> <span class="fu">c</span>(<span class="st">"country"</span>, <span class="st">"long"</span>, <span class="st">"lat"</span>))</span> |
| <span id="cb24-11"><a href="reading-and-writing-data---single-files.html#cb24-11" tabindex="-1"></a>countries</span></code></pre></div> |
| <pre><code>## # A tibble: 3 × 3 |
| ## country long lat |
| ## <chr> <dbl> <dbl> |
| ## 1 United Kingdom -3.44 55.4 |
| ## 2 France 2.21 46.2 |
| ## 3 Germany 10.4 51.2</code></pre> |
| </div> |
| </div> |
| <div id="write-a-compressed-single-data-file" class="section level2 hasAnchor" number="2.15"> |
| <h2><span class="header-section-number">2.15</span> Write a compressed single data file<a href="reading-and-writing-data---single-files.html#write-a-compressed-single-data-file" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to save a single file, compressed with a specified compression algorithm.</p> |
| <div id="solution-13" class="section level3 hasAnchor" number="2.15.1"> |
| <h3><span class="header-section-number">2.15.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-13" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="reading-and-writing-data---single-files.html#cb26-1" tabindex="-1"></a><span class="co"># Create a temporary directory</span></span> |
| <span id="cb26-2"><a href="reading-and-writing-data---single-files.html#cb26-2" tabindex="-1"></a>td <span class="ot"><-</span> <span class="fu">tempfile</span>()</span> |
| <span id="cb26-3"><a href="reading-and-writing-data---single-files.html#cb26-3" tabindex="-1"></a><span class="fu">dir.create</span>(td)</span> |
| <span id="cb26-4"><a href="reading-and-writing-data---single-files.html#cb26-4" tabindex="-1"></a></span> |
| <span id="cb26-5"><a href="reading-and-writing-data---single-files.html#cb26-5" tabindex="-1"></a><span class="co"># Write data compressed with the gzip algorithm instead of the default</span></span> |
| <span id="cb26-6"><a href="reading-and-writing-data---single-files.html#cb26-6" tabindex="-1"></a><span class="fu">write_parquet</span>(iris, <span class="fu">file.path</span>(td, <span class="st">"iris.parquet"</span>), <span class="at">compression =</span> <span class="st">"gzip"</span>)</span></code></pre></div> |
| </div> |
| <div id="see-also-1" class="section level3 hasAnchor" number="2.15.2"> |
| <h3><span class="header-section-number">2.15.2</span> See also<a href="reading-and-writing-data---single-files.html#see-also-1" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <p>Some formats write compressed data by default. For more information |
| on the supported compression algorithms and default settings, see:</p> |
| <ul> |
| <li><code>?write_parquet()</code></li> |
| <li><code>?write_feather()</code></li> |
| </ul> |
| </div> |
| </div> |
| <div id="read-compressed-data" class="section level2 hasAnchor" number="2.16"> |
| <h2><span class="header-section-number">2.16</span> Read compressed data<a href="reading-and-writing-data---single-files.html#read-compressed-data" class="anchor-section" aria-label="Anchor link to header"></a></h2> |
| <p>You want to read in a single data file which has been compressed.</p> |
| <div id="solution-14" class="section level3 hasAnchor" number="2.16.1"> |
| <h3><span class="header-section-number">2.16.1</span> Solution<a href="reading-and-writing-data---single-files.html#solution-14" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="reading-and-writing-data---single-files.html#cb27-1" tabindex="-1"></a><span class="co"># Create a temporary directory</span></span> |
| <span id="cb27-2"><a href="reading-and-writing-data---single-files.html#cb27-2" tabindex="-1"></a>td <span class="ot"><-</span> <span class="fu">tempfile</span>()</span> |
| <span id="cb27-3"><a href="reading-and-writing-data---single-files.html#cb27-3" tabindex="-1"></a><span class="fu">dir.create</span>(td)</span> |
| <span id="cb27-4"><a href="reading-and-writing-data---single-files.html#cb27-4" tabindex="-1"></a></span> |
| <span id="cb27-5"><a href="reading-and-writing-data---single-files.html#cb27-5" tabindex="-1"></a><span class="co"># Write data which is to be read back in</span></span> |
| <span id="cb27-6"><a href="reading-and-writing-data---single-files.html#cb27-6" tabindex="-1"></a><span class="fu">write_parquet</span>(iris, <span class="fu">file.path</span>(td, <span class="st">"iris.parquet"</span>), <span class="at">compression =</span> <span class="st">"gzip"</span>)</span> |
| <span id="cb27-7"><a href="reading-and-writing-data---single-files.html#cb27-7" tabindex="-1"></a></span> |
| <span id="cb27-8"><a href="reading-and-writing-data---single-files.html#cb27-8" tabindex="-1"></a><span class="co"># Read in data</span></span> |
| <span id="cb27-9"><a href="reading-and-writing-data---single-files.html#cb27-9" tabindex="-1"></a>ds <span class="ot"><-</span> <span class="fu">read_parquet</span>(<span class="fu">file.path</span>(td, <span class="st">"iris.parquet"</span>))</span> |
| <span id="cb27-10"><a href="reading-and-writing-data---single-files.html#cb27-10" tabindex="-1"></a>ds</span></code></pre></div> |
| <pre><code>## # A tibble: 150 × 5 |
| ## Sepal.Length Sepal.Width Petal.Length Petal.Width Species |
| ## <dbl> <dbl> <dbl> <dbl> <fct> |
| ## 1 5.1 3.5 1.4 0.2 setosa |
| ## 2 4.9 3 1.4 0.2 setosa |
| ## 3 4.7 3.2 1.3 0.2 setosa |
| ## 4 4.6 3.1 1.5 0.2 setosa |
| ## 5 5 3.6 1.4 0.2 setosa |
| ## 6 5.4 3.9 1.7 0.4 setosa |
| ## 7 4.6 3.4 1.4 0.3 setosa |
| ## 8 5 3.4 1.5 0.2 setosa |
| ## 9 4.4 2.9 1.4 0.2 setosa |
| ## 10 4.9 3.1 1.5 0.1 setosa |
| ## # ℹ 140 more rows</code></pre> |
| </div> |
| <div id="discussion-3" class="section level3 hasAnchor" number="2.16.2"> |
| <h3><span class="header-section-number">2.16.2</span> Discussion<a href="reading-and-writing-data---single-files.html#discussion-3" class="anchor-section" aria-label="Anchor link to header"></a></h3> |
| <p>Note that Arrow automatically detects the compression and you do not have to |
| supply it in the call to the <code>read_*()</code> or the <code>open_dataset()</code> functions.</p> |
| <p>Although the CSV format does not support compression itself, Arrow supports |
| reading in CSV data which has been compressed, if the file extension is <code>.gz</code>.</p> |
| <div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="reading-and-writing-data---single-files.html#cb29-1" tabindex="-1"></a><span class="co"># Create a temporary directory</span></span> |
| <span id="cb29-2"><a href="reading-and-writing-data---single-files.html#cb29-2" tabindex="-1"></a>td <span class="ot"><-</span> <span class="fu">tempfile</span>()</span> |
| <span id="cb29-3"><a href="reading-and-writing-data---single-files.html#cb29-3" tabindex="-1"></a><span class="fu">dir.create</span>(td)</span> |
| <span id="cb29-4"><a href="reading-and-writing-data---single-files.html#cb29-4" tabindex="-1"></a></span> |
| <span id="cb29-5"><a href="reading-and-writing-data---single-files.html#cb29-5" tabindex="-1"></a><span class="co"># Write data which is to be read back in</span></span> |
| <span id="cb29-6"><a href="reading-and-writing-data---single-files.html#cb29-6" tabindex="-1"></a><span class="fu">write.csv</span>(iris, <span class="fu">gzfile</span>(<span class="fu">file.path</span>(td, <span class="st">"iris.csv.gz"</span>)), <span class="at">row.names =</span> <span class="cn">FALSE</span>, <span class="at">quote =</span> <span class="cn">FALSE</span>)</span> |
| <span id="cb29-7"><a href="reading-and-writing-data---single-files.html#cb29-7" tabindex="-1"></a></span> |
| <span id="cb29-8"><a href="reading-and-writing-data---single-files.html#cb29-8" tabindex="-1"></a><span class="co"># Read in data</span></span> |
| <span id="cb29-9"><a href="reading-and-writing-data---single-files.html#cb29-9" tabindex="-1"></a>ds <span class="ot"><-</span> <span class="fu">read_csv_arrow</span>(<span class="fu">file.path</span>(td, <span class="st">"iris.csv.gz"</span>))</span> |
| <span id="cb29-10"><a href="reading-and-writing-data---single-files.html#cb29-10" tabindex="-1"></a>ds</span></code></pre></div> |
| <pre><code>## # A tibble: 150 × 5 |
| ## Sepal.Length Sepal.Width Petal.Length Petal.Width Species |
| ## <dbl> <dbl> <dbl> <dbl> <chr> |
| ## 1 5.1 3.5 1.4 0.2 setosa |
| ## 2 4.9 3 1.4 0.2 setosa |
| ## 3 4.7 3.2 1.3 0.2 setosa |
| ## 4 4.6 3.1 1.5 0.2 setosa |
| ## 5 5 3.6 1.4 0.2 setosa |
| ## 6 5.4 3.9 1.7 0.4 setosa |
| ## 7 4.6 3.4 1.4 0.3 setosa |
| ## 8 5 3.4 1.5 0.2 setosa |
| ## 9 4.4 2.9 1.4 0.2 setosa |
| ## 10 4.9 3.1 1.5 0.1 setosa |
| ## # ℹ 140 more rows</code></pre> |
| |
| <!--- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| </div> |
| </div> |
| </div> |
| </section> |
| |
| </div> |
| </div> |
| </div> |
| <a href="index.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a> |
| <a href="reading-and-writing-data---multiple-files.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a> |
| </div> |
| </div> |
| <script src="libs/gitbook-2.6.7/js/app.min.js"></script> |
| <script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script> |
| <script src="libs/gitbook-2.6.7/js/plugin-search.js"></script> |
| <script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script> |
| <script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script> |
| <script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script> |
| <script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script> |
| <script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script> |
| <script> |
| gitbook.require(["gitbook"], function(gitbook) { |
| gitbook.start({ |
| "sharing": { |
| "github": false, |
| "facebook": true, |
| "twitter": true, |
| "linkedin": false, |
| "weibo": false, |
| "instapaper": false, |
| "vk": false, |
| "whatsapp": false, |
| "all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"] |
| }, |
| "fontsettings": { |
| "theme": "white", |
| "family": "sans", |
| "size": 2 |
| }, |
| "edit": { |
| "link": "https://github.com/apache/arrow-cookbook/edit/main/r/content/reading_and_writing_data.Rmd", |
| "text": "Edit" |
| }, |
| "history": { |
| "link": null, |
| "text": null |
| }, |
| "view": { |
| "link": null, |
| "text": null |
| }, |
| "download": null, |
| "search": { |
| "engine": "fuse", |
| "options": null |
| }, |
| "toc": { |
| "collapse": "subsection" |
| } |
| }); |
| }); |
| </script> |
| |
| </body> |
| |
| </html> |