blob: 42e75fd4813fe76661832fb8fd35d1021fdc7119 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Arrow File I/O &#8212; Apache Arrow v17.0.0.dev59</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/sphinx_highlight.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script src="../../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'cpp/tutorials/io_tutorial';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/tutorials/io_tutorial.html" />
<link rel="icon" href="../../_static/favicon.ico"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Arrow Compute" href="compute_tutorial.html" />
<link rel="prev" title="Basic Arrow Data Structures" href="basic_arrow.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../index.html">
<img src="../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev59 - Home"/>
<script>document.write(`<img src="../../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev59 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="../index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="../index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../getting_started.html">Getting Started</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../build_system.html">Using Arrow C++ in your own project</a></li>
<li class="toctree-l2"><a class="reference internal" href="../conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="basic_arrow.html">Basic Arrow Data Structures</a></li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Arrow File I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="compute_tutorial.html">Arrow Compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="datasets_tutorial.html">Arrow Datasets</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../user_guide.html">User Guide</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../overview.html">High-Level Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../memory.html">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../arrays.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../datatypes.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tables.html">Tabular Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="../compute.html">Compute Functions</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../gandiva.html">The Gandiva Expression Compiler</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../gandiva/expr_projector_filter.html">Gandiva Expression, Projector, and Filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../gandiva/external_func.html">Gandiva External Functions Development Guide</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../streaming_execution.html">Acero: A C++ streaming execution engine</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../acero/overview.html">Acero Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="../acero/user_guide.html">Acero User’s Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="../acero/substrait.html">Using Acero with Substrait</a></li>
<li class="toctree-l3"><a class="reference internal" href="../acero/developer_guide.html">Developer’s Guide</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../io.html">Input / output and filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ipc.html">Reading and writing the Arrow IPC format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../orc.html">Reading and Writing ORC files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../parquet.html">Reading and writing Parquet files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../gdb.html">Debugging code using Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../threading.html">Thread Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../opentelemetry.html">OpenTelemetry</a></li>
<li class="toctree-l2"><a class="reference internal" href="../env_vars.html">Environment Variables</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/index.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../examples/cmake_minimal_build.html">Minimal build using CMake</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/compute_and_write_example.html">Compute and Write CSV Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/dataset_documentation_example.html">Arrow Datasets example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/dataset_skyhook_scan_example.html">Arrow Skyhook example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/row_columnar_conversion.html">Row to columnar conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/tuple_range_conversion.html">std::tuple-like ranges to Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/converting_recordbatch_to_tensor.html">Converting RecordBatch to Tensor</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/support.html">Programming Support</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/memory.html">Memory (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/thread.html">Thread (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/datatype.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/array.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/scalar.html">Scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/builder.html">Array Builders</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/table.html">Two-dimensional Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/c_abi.html">C Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/acero.html">Streaming Execution (Acero)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/gandiva.html">Gandiva Expression Compiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/tensor.html">Tensors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/utilities.html">Utilities</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/async.html">Asynchronous programming</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/io.html">Input / output</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/ipc.html">Arrow IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/formats.html">File Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/cuda.html">CUDA support</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/flightsql.html">Arrow Flight SQL</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/filesystem.html">Filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/dataset.html">Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">C++ cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../index.html" class="nav-link">C++ Implementation</a></li>
<li class="breadcrumb-item"><a href="../getting_started.html" class="nav-link">Getting Started</a></li>
<li class="breadcrumb-item active" aria-current="page">Arrow File I/O</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="arrow-file-i-o">
<h1>Arrow File I/O<a class="headerlink" href="#arrow-file-i-o" title="Permalink to this heading">#</a></h1>
<p>Apache Arrow provides file I/O functions to facilitate use of Arrow from
the start to end of an application. In this article, you will:</p>
<ol class="arabic simple">
<li><p>Read an Arrow file into a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> and write it back out afterwards</p></li>
<li><p>Read a CSV file into a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a> and write it back out afterwards</p></li>
<li><p>Read a Parquet file into a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a> and write it back out afterwards</p></li>
</ol>
<section id="pre-requisites">
<h2>Pre-requisites<a class="headerlink" href="#pre-requisites" title="Permalink to this heading">#</a></h2>
<p>Before continuing, make sure you have:</p>
<ol class="arabic simple">
<li><p>An Arrow installation, which you can set up here: <a class="reference internal" href="../build_system.html"><span class="doc">Using Arrow C++ in your own project</span></a></p></li>
<li><p>An understanding of basic Arrow data structures from <a class="reference internal" href="basic_arrow.html"><span class="doc">Basic Arrow Data Structures</span></a></p></li>
<li><p>A directory to run the final application in – this program will generate some files, so be prepared for that.</p></li>
</ol>
</section>
<section id="setup">
<h2>Setup<a class="headerlink" href="#setup" title="Permalink to this heading">#</a></h2>
<p>Before writing out some file I/O, we need to fill in a couple gaps:</p>
<ol class="arabic simple">
<li><p>We need to include necessary headers.</p></li>
<li><p>A <code class="docutils literal notranslate"><span class="pre">main()</span></code> is needed to glue things together.</p></li>
<li><p>We need files to play with.</p></li>
</ol>
<section id="includes">
<h3>Includes<a class="headerlink" href="#includes" title="Permalink to this heading">#</a></h3>
<p>Before writing C++ code, we need some includes. We’ll get <code class="docutils literal notranslate"><span class="pre">iostream</span></code> for output, then import Arrow’s
I/O functionality for each file type we’ll work with in this article:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/api.h&gt;</span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/csv/api.h&gt;</span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/io/api.h&gt;</span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/ipc/api.h&gt;</span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;parquet/arrow/reader.h&gt;</span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;parquet/arrow/writer.h&gt;</span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;iostream&gt;</span>
</pre></div>
</div>
</section>
<section id="main">
<h3>Main()<a class="headerlink" href="#main" title="Permalink to this heading">#</a></h3>
<p>For our glue, we’ll use the <code class="docutils literal notranslate"><span class="pre">main()</span></code> pattern from the previous tutorial on
data structures:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">st</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">RunMain</span><span class="p">();</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">st</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cerr</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">st</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="mi">1</span><span class="p">;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Which, like when we used it before, is paired with a <code class="docutils literal notranslate"><span class="pre">RunMain()</span></code>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">RunMain</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
</pre></div>
</div>
</section>
<section id="generating-files-for-reading">
<h3>Generating Files for Reading<a class="headerlink" href="#generating-files-for-reading" title="Permalink to this heading">#</a></h3>
<p>We need some files to actually play with. In practice, you’ll likely
have some input for your own application. Here, however, we want to
explore doing I/O for the sake of it, so let’s generate some files to make
this easy to follow. To create those, we’ll define a helper function
that we’ll run first. Feel free to read through this, but the concepts
used will be explained later in this article. Note that we’re using the
day/month/year data from the previous tutorial. For now, just copy the
function in:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">GenInitialFile</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Make a couple 8-bit integer arrays and a 16-bit integer array -- just like</span>
<span class="w"> </span><span class="c1">// basic Arrow example.</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Int8Builder</span><span class="w"> </span><span class="n">int8builder</span><span class="p">;</span>
<span class="w"> </span><span class="kt">int8_t</span><span class="w"> </span><span class="n">days_raw</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">17</span><span class="p">,</span><span class="w"> </span><span class="mi">23</span><span class="p">,</span><span class="w"> </span><span class="mi">28</span><span class="p">};</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">int8builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">days_raw</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span><span class="w"> </span><span class="n">days</span><span class="p">;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">days</span><span class="p">,</span><span class="w"> </span><span class="n">int8builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">());</span>
<span class="w"> </span><span class="kt">int8_t</span><span class="w"> </span><span class="n">months_raw</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">};</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">int8builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">months_raw</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span><span class="w"> </span><span class="n">months</span><span class="p">;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">months</span><span class="p">,</span><span class="w"> </span><span class="n">int8builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">());</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Int16Builder</span><span class="w"> </span><span class="n">int16builder</span><span class="p">;</span>
<span class="w"> </span><span class="kt">int16_t</span><span class="w"> </span><span class="n">years_raw</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1990</span><span class="p">,</span><span class="w"> </span><span class="mi">2000</span><span class="p">,</span><span class="w"> </span><span class="mi">1995</span><span class="p">,</span><span class="w"> </span><span class="mi">2000</span><span class="p">,</span><span class="w"> </span><span class="mi">1995</span><span class="p">};</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">int16builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">years_raw</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span><span class="w"> </span><span class="n">years</span><span class="p">;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">years</span><span class="p">,</span><span class="w"> </span><span class="n">int16builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">());</span>
<span class="w"> </span><span class="c1">// Get a vector of our Arrays</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">columns</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">days</span><span class="p">,</span><span class="w"> </span><span class="n">months</span><span class="p">,</span><span class="w"> </span><span class="n">years</span><span class="p">};</span>
<span class="w"> </span><span class="c1">// Make a schema to initialize the Table with</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">field_day</span><span class="p">,</span><span class="w"> </span><span class="n">field_month</span><span class="p">,</span><span class="w"> </span><span class="n">field_year</span><span class="p">;</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">schema</span><span class="p">;</span>
<span class="w"> </span><span class="n">field_day</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;Day&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int8</span><span class="p">());</span>
<span class="w"> </span><span class="n">field_month</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;Month&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int8</span><span class="p">());</span>
<span class="w"> </span><span class="n">field_year</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;Year&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int16</span><span class="p">());</span>
<span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">field_day</span><span class="p">,</span><span class="w"> </span><span class="n">field_month</span><span class="p">,</span><span class="w"> </span><span class="n">field_year</span><span class="p">});</span>
<span class="w"> </span><span class="c1">// With the schema and data, create a Table</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">table</span><span class="p">;</span>
<span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// Write out test files in IPC, CSV, and Parquet for the example to use.</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">outfile</span><span class="p">;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.arrow&quot;</span><span class="p">));</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchWriter</span><span class="o">&gt;</span><span class="w"> </span><span class="n">ipc_writer</span><span class="p">,</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">));</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="p">));</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.csv&quot;</span><span class="p">));</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">csv_writer</span><span class="p">,</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">MakeCSVWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()));</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="p">));</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.parquet&quot;</span><span class="p">));</span>
<span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span>
<span class="w"> </span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span><span class="w"> </span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<p>To get the files for the rest of your code to function, make sure to
call <code class="docutils literal notranslate"><span class="pre">GenInitialFile()</span></code> as the very first line in <code class="docutils literal notranslate"><span class="pre">RunMain()</span></code> to initialize
the environment:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Generate initial files for each format with a helper function -- don&#39;t worry,</span>
<span class="w"> </span><span class="c1">// we&#39;ll also write a table in this example.</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">GenInitialFile</span><span class="p">());</span>
</pre></div>
</div>
</section>
</section>
<section id="i-o-with-arrow-files">
<h2>I/O with Arrow Files<a class="headerlink" href="#i-o-with-arrow-files" title="Permalink to this heading">#</a></h2>
<p>We’re going to go through this step by step, reading then writing, as
follows:</p>
<ol class="arabic simple">
<li><p>Reading a file</p>
<ol class="loweralpha simple">
<li><p>Open the file</p></li>
<li><p>Bind file to <a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc21RecordBatchFileReaderE" title="arrow::ipc::RecordBatchFileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ipc::RecordBatchFileReader</span></code></a></p></li>
<li><p>Read file to <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a></p></li>
</ol>
</li>
<li><p>Writing a file</p>
<ol class="loweralpha simple">
<li><p>Get a <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io16FileOutputStreamE" title="arrow::io::FileOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::FileOutputStream</span></code></a></p></li>
<li><p>Write to file from <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a></p></li>
</ol>
</li>
</ol>
<section id="opening-a-file">
<h3>Opening a File<a class="headerlink" href="#opening-a-file" title="Permalink to this heading">#</a></h3>
<p>To actually read a file, we need to get some sort of way to point to it.
In Arrow, that means we’re going to get a <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a> object – much
like an <a class="reference internal" href="../api/builder.html#_CPPv4N5arrow12ArrayBuilderE" title="arrow::ArrayBuilder"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ArrayBuilder</span></code></a> can clear and make new arrays, we can reassign this
to new files, so we’ll use this instance throughout the examples:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// First, we have to set up a ReadableFile object, which just lets us point our</span>
<span class="w"> </span><span class="c1">// readers to the right data on disk. We&#39;ll be reusing this object, and rebinding</span>
<span class="w"> </span><span class="c1">// it to multiple files throughout the example.</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">&gt;</span><span class="w"> </span><span class="n">infile</span><span class="p">;</span>
</pre></div>
</div>
<p>A <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a> does little alone – we actually have it bind to a file
with <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFile4OpenERKNSt6stringEP10MemoryPool" title="arrow::io::ReadableFile::Open"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">io::ReadableFile::Open()</span></code></a>. For
our purposes here, the default arguments suffice:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Get &quot;test_in.arrow&quot; into our file pointer</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;test_in.arrow&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">()));</span>
</pre></div>
</div>
</section>
<section id="opening-an-arrow-file-reader">
<h3>Opening an Arrow file Reader<a class="headerlink" href="#opening-an-arrow-file-reader" title="Permalink to this heading">#</a></h3>
<p>An <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a> is too generic to offer all functionality to read an Arrow file.
We need to use it to get an <a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc21RecordBatchFileReaderE" title="arrow::ipc::RecordBatchFileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ipc::RecordBatchFileReader</span></code></a> object. This object implements
all the logic needed to read an Arrow file with correct formatting. We get one through
<a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc21RecordBatchFileReader4OpenEPN2io16RandomAccessFileERK14IpcReadOptions" title="arrow::ipc::RecordBatchFileReader::Open"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ipc::RecordBatchFileReader::Open()</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Open up the file with the IPC features of the library, gives us a reader object.</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">ipc_reader</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchFileReader</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="n">infile</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="reading-an-open-arrow-file-to-recordbatch">
<h3>Reading an Open Arrow File to RecordBatch<a class="headerlink" href="#reading-an-open-arrow-file-to-recordbatch" title="Permalink to this heading">#</a></h3>
<p>We have to use a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> to read an Arrow file, so we’ll get a
<a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a>. Once we have that, we can actually read the file. Arrow
files can have multiple <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatches</span></code></a>, so we must pass an index. This
file only has one, so pass 0:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Using the reader, we can read Record Batches. Note that this is specific to IPC;</span>
<span class="w"> </span><span class="c1">// for other formats, we focus on Tables, but here, RecordBatches are used.</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">rbatch</span><span class="p">;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">rbatch</span><span class="p">,</span><span class="w"> </span><span class="n">ipc_reader</span><span class="o">-&gt;</span><span class="n">ReadRecordBatch</span><span class="p">(</span><span class="mi">0</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="prepare-a-fileoutputstream">
<h3>Prepare a FileOutputStream<a class="headerlink" href="#prepare-a-fileoutputstream" title="Permalink to this heading">#</a></h3>
<p>For output, we need a <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io16FileOutputStreamE" title="arrow::io::FileOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::FileOutputStream</span></code></a>. Just like our <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a>,
we’ll be reusing this, so be ready for that. We open files the same way
as when reading:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Just like with input, we get an object for the output file.</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">outfile</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// Bind it to &quot;test_out.arrow&quot;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_out.arrow&quot;</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="write-arrow-file-from-recordbatch">
<h3>Write Arrow File from RecordBatch<a class="headerlink" href="#write-arrow-file-from-recordbatch" title="Permalink to this heading">#</a></h3>
<p>Now, we grab our <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> we read into previously, and use it, along
with our target file, to create a <a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc17RecordBatchWriterE" title="arrow::ipc::RecordBatchWriter"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ipc::RecordBatchWriter</span></code></a>. The
<a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc17RecordBatchWriterE" title="arrow::ipc::RecordBatchWriter"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ipc::RecordBatchWriter</span></code></a> needs two things:</p>
<ol class="arabic simple">
<li><p>the target file</p></li>
<li><p>the <a class="reference internal" href="../api/datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Schema</span></code></a> for our <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> (in case we need to write more <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatches</span></code></a> of the same format.)</p></li>
</ol>
<p>The <a class="reference internal" href="../api/datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Schema</span></code></a> comes from our existing <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> and the target file is
the output stream we just created.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Set up a writer with the output file -- and the schema! We&#39;re defining everything</span>
<span class="w"> </span><span class="c1">// here, loading to fire.</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchWriter</span><span class="o">&gt;</span><span class="w"> </span><span class="n">ipc_writer</span><span class="p">,</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">rbatch</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()));</span>
</pre></div>
</div>
<p>We can just call <a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc17RecordBatchWriter16WriteRecordBatchERK11RecordBatch" title="arrow::ipc::RecordBatchWriter::WriteRecordBatch"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ipc::RecordBatchWriter::WriteRecordBatch()</span></code></a> with our <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> to fill up our
file:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Write the record batch.</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">WriteRecordBatch</span><span class="p">(</span><span class="o">*</span><span class="n">rbatch</span><span class="p">));</span>
</pre></div>
</div>
<p>For IPC in particular, the writer has to be closed since it anticipates more than one batch may be written. To do that:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Specifically for IPC, the writer needs to be explicitly closed.</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
</pre></div>
</div>
<p>Now we’ve read and written an IPC file!</p>
</section>
</section>
<section id="i-o-with-csv">
<h2>I/O with CSV<a class="headerlink" href="#i-o-with-csv" title="Permalink to this heading">#</a></h2>
<p>We’re going to go through this step by step, reading then writing, as
follows:</p>
<ol class="arabic simple">
<li><p>Reading a file</p>
<ol class="loweralpha simple">
<li><p>Open the file</p></li>
<li><p>Prepare Table</p></li>
<li><p>Read File using <a class="reference internal" href="../api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">csv::TableReader</span></code></a></p></li>
</ol>
</li>
<li><p>Writing a file</p>
<ol class="loweralpha simple">
<li><p>Get a <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io16FileOutputStreamE" title="arrow::io::FileOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::FileOutputStream</span></code></a></p></li>
<li><p>Write to file from <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a></p></li>
</ol>
</li>
</ol>
<section id="opening-a-csv-file">
<h3>Opening a CSV File<a class="headerlink" href="#opening-a-csv-file" title="Permalink to this heading">#</a></h3>
<p>For a CSV file, we need to open a <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a>, just like an Arrow file,
and reuse our <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a> object from before to do so:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Bind our input file to &quot;test_in.csv&quot;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.csv&quot;</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="preparing-a-table">
<h3>Preparing a Table<a class="headerlink" href="#preparing-a-table" title="Permalink to this heading">#</a></h3>
<p>CSV can be read into a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>, so declare a pointer to a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">csv_table</span><span class="p">;</span>
</pre></div>
</div>
</section>
<section id="read-a-csv-file-to-table">
<h3>Read a CSV File to Table<a class="headerlink" href="#read-a-csv-file-to-table" title="Permalink to this heading">#</a></h3>
<p>The CSV reader has option structs which need to be passed – luckily,
there are defaults for these which we can pass directly. For reference
on the other options, go here: <a class="reference internal" href="../api/formats.html"><span class="doc">File Formats</span></a>.
without any special delimiters and is small, so we can make our reader
with defaults:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// The CSV reader has several objects for various options. For now, we&#39;ll use defaults.</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">csv_reader</span><span class="p">,</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">TableReader</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">default_io_context</span><span class="p">(),</span><span class="w"> </span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ReadOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">(),</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ParseOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">(),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ConvertOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">()));</span>
</pre></div>
</div>
<p>With the CSV reader primed, we can use its <a class="reference internal" href="../api/formats.html#_CPPv4N5arrow3csv11TableReader4ReadEv" title="arrow::csv::TableReader::Read"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">csv::TableReader::Read()</span></code></a> method to fill our
<a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Read the table.</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">csv_table</span><span class="p">,</span><span class="w"> </span><span class="n">csv_reader</span><span class="o">-&gt;</span><span class="n">Read</span><span class="p">())</span>
</pre></div>
</div>
</section>
<section id="write-a-csv-file-from-table">
<h3>Write a CSV File from Table<a class="headerlink" href="#write-a-csv-file-from-table" title="Permalink to this heading">#</a></h3>
<p>CSV writing to <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a> looks exactly like IPC writing to <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a>,
except with our <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>, and using <a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc17RecordBatchWriter10WriteTableERK5Table" title="arrow::ipc::RecordBatchWriter::WriteTable"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ipc::RecordBatchWriter::WriteTable()</span></code></a> instead of
<a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc17RecordBatchWriter16WriteRecordBatchERK11RecordBatch" title="arrow::ipc::RecordBatchWriter::WriteRecordBatch"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ipc::RecordBatchWriter::WriteRecordBatch()</span></code></a>. Note that the same writer class is used –
we’re writing with <a class="reference internal" href="../api/ipc.html#_CPPv4N5arrow3ipc17RecordBatchWriter10WriteTableERK5Table" title="arrow::ipc::RecordBatchWriter::WriteTable"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ipc::RecordBatchWriter::WriteTable()</span></code></a> because we have a <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>. We’ll target
a file, use our <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table’s</span></code></a> <a class="reference internal" href="../api/datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Schema</span></code></a>, and then write the <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Bind our output file to &quot;test_out.csv&quot;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_out.csv&quot;</span><span class="p">));</span>
<span class="w"> </span><span class="c1">// The CSV writer has simpler defaults, review API documentation for more complex usage.</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">csv_writer</span><span class="p">,</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">MakeCSVWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">csv_table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()));</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">csv_table</span><span class="p">));</span>
<span class="w"> </span><span class="c1">// Not necessary, but a safe practice.</span>
<span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
</pre></div>
</div>
<p>Now, we’ve read and written a CSV file!</p>
</section>
</section>
<section id="file-i-o-with-parquet">
<h2>File I/O with Parquet<a class="headerlink" href="#file-i-o-with-parquet" title="Permalink to this heading">#</a></h2>
<p>We’re going to go through this step by step, reading then writing, as
follows:</p>
<ol class="arabic simple">
<li><p>Reading a file</p>
<ol class="loweralpha simple">
<li><p>Open the file</p></li>
<li><p>Prepare <a class="reference internal" href="../api/formats.html#_CPPv4N7parquet5arrow10FileReaderE" title="parquet::arrow::FileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">parquet::arrow::FileReader</span></code></a></p></li>
<li><p>Read file to <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a></p></li>
</ol>
</li>
<li><p>Writing a file</p>
<ol class="loweralpha simple">
<li><p>Write <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a> to file</p></li>
</ol>
</li>
</ol>
<section id="opening-a-parquet-file">
<h3>Opening a Parquet File<a class="headerlink" href="#opening-a-parquet-file" title="Permalink to this heading">#</a></h3>
<p>Once more, this file format, Parquet, needs a <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">io::ReadableFile</span></code></a>, which we
already have, and for the <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFile4OpenERKNSt6stringEP10MemoryPool" title="arrow::io::ReadableFile::Open"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">io::ReadableFile::Open()</span></code></a> method to be called on a file:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Bind our input file to &quot;test_in.parquet&quot;</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.parquet&quot;</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="setting-up-a-parquet-reader">
<h3>Setting up a Parquet Reader<a class="headerlink" href="#setting-up-a-parquet-reader" title="Permalink to this heading">#</a></h3>
<p>As always, we need a Reader to actually read the file. We’ve been
getting Readers for each file format from the Arrow namespace. This
time, we enter the Parquet namespace to get the <a class="reference internal" href="../api/formats.html#_CPPv4N7parquet5arrow10FileReaderE" title="parquet::arrow::FileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">parquet::arrow::FileReader</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">unique_ptr</span><span class="o">&lt;</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">FileReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">reader</span><span class="p">;</span>
</pre></div>
</div>
<p>Now, to set up our reader, we call <code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">parquet::arrow::OpenFile()</span></code>. Yes, this is necessary
even though we used <a class="reference internal" href="../api/io.html#_CPPv4N5arrow2io12ReadableFile4OpenERKNSt6stringEP10MemoryPool" title="arrow::io::ReadableFile::Open"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">io::ReadableFile::Open()</span></code></a>. Note that we pass our
<a class="reference internal" href="../api/formats.html#_CPPv4N7parquet5arrow10FileReaderE" title="parquet::arrow::FileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">parquet::arrow::FileReader</span></code></a> by reference, instead of assigning to it in output:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Note that Parquet&#39;s OpenFile() takes the reader by reference, rather than returning</span>
<span class="w"> </span><span class="c1">// a reader.</span>
<span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span>
<span class="w"> </span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">OpenFile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span><span class="w"> </span><span class="o">&amp;</span><span class="n">reader</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="reading-a-parquet-file-to-table">
<h3>Reading a Parquet File to Table<a class="headerlink" href="#reading-a-parquet-file-to-table" title="Permalink to this heading">#</a></h3>
<p>With a prepared <a class="reference internal" href="../api/formats.html#_CPPv4N7parquet5arrow10FileReaderE" title="parquet::arrow::FileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">parquet::arrow::FileReader</span></code></a> in hand, we can read to a
<a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a>, except we must pass the <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a> by reference instead of outputting to it:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">parquet_table</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// Read the table.</span>
<span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span><span class="n">reader</span><span class="o">-&gt;</span><span class="n">ReadTable</span><span class="p">(</span><span class="o">&amp;</span><span class="n">parquet_table</span><span class="p">));</span>
</pre></div>
</div>
</section>
<section id="writing-a-parquet-file-from-table">
<h3>Writing a Parquet File from Table<a class="headerlink" href="#writing-a-parquet-file-from-table" title="Permalink to this heading">#</a></h3>
<p>For single-shot writes, writing a Parquet file does not need a writer object. Instead, we give
it our table, point to the memory pool it will use for any necessary
memory consumption, tell it where to write, and the chunk size if it
needs to break up the file at all:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="c1">// Parquet writing does not need a declared writer object. Just get the output</span>
<span class="w"> </span><span class="c1">// file bound, then pass in the table, memory pool, output, and chunk size for</span>
<span class="w"> </span><span class="c1">// breaking up the Table on-disk.</span>
<span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_out.parquet&quot;</span><span class="p">));</span>
<span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span>
<span class="w"> </span><span class="o">*</span><span class="n">parquet_table</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span><span class="w"> </span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
</pre></div>
</div>
</section>
</section>
<section id="ending-program">
<h2>Ending Program<a class="headerlink" href="#ending-program" title="Permalink to this heading">#</a></h2>
<p>At the end, we just return <a class="reference internal" href="../api/support.html#_CPPv4N5arrow6Status2OKEv" title="arrow::Status::OK"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">Status::OK()</span></code></a>, so the <code class="docutils literal notranslate"><span class="pre">main()</span></code> knows that
we’re done, and that everything’s okay. Just like in the first tutorial.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<p>With that, you’ve read and written IPC, CSV, and Parquet in Arrow, and
can properly load data and write output! Now, we can move into
processing data with compute functions in the next article.</p>
<p>Refer to the below for a copy of the complete code:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 19</span><span class="c1">// (Doc section: Includes)</span>
<span class="linenos"> 20</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/api.h&gt;</span>
<span class="linenos"> 21</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/csv/api.h&gt;</span>
<span class="linenos"> 22</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/io/api.h&gt;</span>
<span class="linenos"> 23</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/ipc/api.h&gt;</span>
<span class="linenos"> 24</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;parquet/arrow/reader.h&gt;</span>
<span class="linenos"> 25</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;parquet/arrow/writer.h&gt;</span>
<span class="linenos"> 26</span>
<span class="linenos"> 27</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;iostream&gt;</span>
<span class="linenos"> 28</span><span class="c1">// (Doc section: Includes)</span>
<span class="linenos"> 29</span>
<span class="linenos"> 30</span><span class="c1">// (Doc section: GenInitialFile)</span>
<span class="linenos"> 31</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">GenInitialFile</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos"> 32</span><span class="w"> </span><span class="c1">// Make a couple 8-bit integer arrays and a 16-bit integer array -- just like</span>
<span class="linenos"> 33</span><span class="w"> </span><span class="c1">// basic Arrow example.</span>
<span class="linenos"> 34</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Int8Builder</span><span class="w"> </span><span class="n">int8builder</span><span class="p">;</span>
<span class="linenos"> 35</span><span class="w"> </span><span class="kt">int8_t</span><span class="w"> </span><span class="n">days_raw</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">17</span><span class="p">,</span><span class="w"> </span><span class="mi">23</span><span class="p">,</span><span class="w"> </span><span class="mi">28</span><span class="p">};</span>
<span class="linenos"> 36</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">int8builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">days_raw</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="linenos"> 37</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span><span class="w"> </span><span class="n">days</span><span class="p">;</span>
<span class="linenos"> 38</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">days</span><span class="p">,</span><span class="w"> </span><span class="n">int8builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">());</span>
<span class="linenos"> 39</span>
<span class="linenos"> 40</span><span class="w"> </span><span class="kt">int8_t</span><span class="w"> </span><span class="n">months_raw</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">};</span>
<span class="linenos"> 41</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">int8builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">months_raw</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="linenos"> 42</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span><span class="w"> </span><span class="n">months</span><span class="p">;</span>
<span class="linenos"> 43</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">months</span><span class="p">,</span><span class="w"> </span><span class="n">int8builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">());</span>
<span class="linenos"> 44</span>
<span class="linenos"> 45</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Int16Builder</span><span class="w"> </span><span class="n">int16builder</span><span class="p">;</span>
<span class="linenos"> 46</span><span class="w"> </span><span class="kt">int16_t</span><span class="w"> </span><span class="n">years_raw</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1990</span><span class="p">,</span><span class="w"> </span><span class="mi">2000</span><span class="p">,</span><span class="w"> </span><span class="mi">1995</span><span class="p">,</span><span class="w"> </span><span class="mi">2000</span><span class="p">,</span><span class="w"> </span><span class="mi">1995</span><span class="p">};</span>
<span class="linenos"> 47</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">int16builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">years_raw</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="linenos"> 48</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span><span class="w"> </span><span class="n">years</span><span class="p">;</span>
<span class="linenos"> 49</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">years</span><span class="p">,</span><span class="w"> </span><span class="n">int16builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">());</span>
<span class="linenos"> 50</span>
<span class="linenos"> 51</span><span class="w"> </span><span class="c1">// Get a vector of our Arrays</span>
<span class="linenos"> 52</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">columns</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">days</span><span class="p">,</span><span class="w"> </span><span class="n">months</span><span class="p">,</span><span class="w"> </span><span class="n">years</span><span class="p">};</span>
<span class="linenos"> 53</span>
<span class="linenos"> 54</span><span class="w"> </span><span class="c1">// Make a schema to initialize the Table with</span>
<span class="linenos"> 55</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">field_day</span><span class="p">,</span><span class="w"> </span><span class="n">field_month</span><span class="p">,</span><span class="w"> </span><span class="n">field_year</span><span class="p">;</span>
<span class="linenos"> 56</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">schema</span><span class="p">;</span>
<span class="linenos"> 57</span>
<span class="linenos"> 58</span><span class="w"> </span><span class="n">field_day</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;Day&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int8</span><span class="p">());</span>
<span class="linenos"> 59</span><span class="w"> </span><span class="n">field_month</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;Month&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int8</span><span class="p">());</span>
<span class="linenos"> 60</span><span class="w"> </span><span class="n">field_year</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;Year&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int16</span><span class="p">());</span>
<span class="linenos"> 61</span>
<span class="linenos"> 62</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">field_day</span><span class="p">,</span><span class="w"> </span><span class="n">field_month</span><span class="p">,</span><span class="w"> </span><span class="n">field_year</span><span class="p">});</span>
<span class="linenos"> 63</span><span class="w"> </span><span class="c1">// With the schema and data, create a Table</span>
<span class="linenos"> 64</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">table</span><span class="p">;</span>
<span class="linenos"> 65</span><span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">);</span>
<span class="linenos"> 66</span>
<span class="linenos"> 67</span><span class="w"> </span><span class="c1">// Write out test files in IPC, CSV, and Parquet for the example to use.</span>
<span class="linenos"> 68</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">outfile</span><span class="p">;</span>
<span class="linenos"> 69</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.arrow&quot;</span><span class="p">));</span>
<span class="linenos"> 70</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchWriter</span><span class="o">&gt;</span><span class="w"> </span><span class="n">ipc_writer</span><span class="p">,</span>
<span class="linenos"> 71</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">));</span>
<span class="linenos"> 72</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="p">));</span>
<span class="linenos"> 73</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="linenos"> 74</span>
<span class="linenos"> 75</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.csv&quot;</span><span class="p">));</span>
<span class="linenos"> 76</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">csv_writer</span><span class="p">,</span>
<span class="linenos"> 77</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">MakeCSVWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()));</span>
<span class="linenos"> 78</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="p">));</span>
<span class="linenos"> 79</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="linenos"> 80</span>
<span class="linenos"> 81</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.parquet&quot;</span><span class="p">));</span>
<span class="linenos"> 82</span><span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span>
<span class="linenos"> 83</span><span class="w"> </span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span><span class="w"> </span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="linenos"> 84</span>
<span class="linenos"> 85</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos"> 86</span><span class="p">}</span>
<span class="linenos"> 87</span><span class="c1">// (Doc section: GenInitialFile)</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span><span class="c1">// (Doc section: RunMain)</span>
<span class="linenos"> 90</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">RunMain</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos"> 91</span><span class="w"> </span><span class="c1">// (Doc section: RunMain)</span>
<span class="linenos"> 92</span><span class="w"> </span><span class="c1">// (Doc section: Gen Files)</span>
<span class="linenos"> 93</span><span class="w"> </span><span class="c1">// Generate initial files for each format with a helper function -- don&#39;t worry,</span>
<span class="linenos"> 94</span><span class="w"> </span><span class="c1">// we&#39;ll also write a table in this example.</span>
<span class="linenos"> 95</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">GenInitialFile</span><span class="p">());</span>
<span class="linenos"> 96</span><span class="w"> </span><span class="c1">// (Doc section: Gen Files)</span>
<span class="linenos"> 97</span>
<span class="linenos"> 98</span><span class="w"> </span><span class="c1">// (Doc section: ReadableFile Definition)</span>
<span class="linenos"> 99</span><span class="w"> </span><span class="c1">// First, we have to set up a ReadableFile object, which just lets us point our</span>
<span class="linenos">100</span><span class="w"> </span><span class="c1">// readers to the right data on disk. We&#39;ll be reusing this object, and rebinding</span>
<span class="linenos">101</span><span class="w"> </span><span class="c1">// it to multiple files throughout the example.</span>
<span class="linenos">102</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">&gt;</span><span class="w"> </span><span class="n">infile</span><span class="p">;</span>
<span class="linenos">103</span><span class="w"> </span><span class="c1">// (Doc section: ReadableFile Definition)</span>
<span class="linenos">104</span><span class="w"> </span><span class="c1">// (Doc section: Arrow ReadableFile Open)</span>
<span class="linenos">105</span><span class="w"> </span><span class="c1">// Get &quot;test_in.arrow&quot; into our file pointer</span>
<span class="linenos">106</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span>
<span class="linenos">107</span><span class="w"> </span><span class="s">&quot;test_in.arrow&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">()));</span>
<span class="linenos">108</span><span class="w"> </span><span class="c1">// (Doc section: Arrow ReadableFile Open)</span>
<span class="linenos">109</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Read Open)</span>
<span class="linenos">110</span><span class="w"> </span><span class="c1">// Open up the file with the IPC features of the library, gives us a reader object.</span>
<span class="linenos">111</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">ipc_reader</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchFileReader</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="n">infile</span><span class="p">));</span>
<span class="linenos">112</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Read Open)</span>
<span class="linenos">113</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Read)</span>
<span class="linenos">114</span><span class="w"> </span><span class="c1">// Using the reader, we can read Record Batches. Note that this is specific to IPC;</span>
<span class="linenos">115</span><span class="w"> </span><span class="c1">// for other formats, we focus on Tables, but here, RecordBatches are used.</span>
<span class="linenos">116</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">rbatch</span><span class="p">;</span>
<span class="linenos">117</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">rbatch</span><span class="p">,</span><span class="w"> </span><span class="n">ipc_reader</span><span class="o">-&gt;</span><span class="n">ReadRecordBatch</span><span class="p">(</span><span class="mi">0</span><span class="p">));</span>
<span class="linenos">118</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Read)</span>
<span class="linenos">119</span>
<span class="linenos">120</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Write Open)</span>
<span class="linenos">121</span><span class="w"> </span><span class="c1">// Just like with input, we get an object for the output file.</span>
<span class="linenos">122</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">outfile</span><span class="p">;</span>
<span class="linenos">123</span><span class="w"> </span><span class="c1">// Bind it to &quot;test_out.arrow&quot;</span>
<span class="linenos">124</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_out.arrow&quot;</span><span class="p">));</span>
<span class="linenos">125</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Write Open)</span>
<span class="linenos">126</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Writer)</span>
<span class="linenos">127</span><span class="w"> </span><span class="c1">// Set up a writer with the output file -- and the schema! We&#39;re defining everything</span>
<span class="linenos">128</span><span class="w"> </span><span class="c1">// here, loading to fire.</span>
<span class="linenos">129</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchWriter</span><span class="o">&gt;</span><span class="w"> </span><span class="n">ipc_writer</span><span class="p">,</span>
<span class="linenos">130</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">rbatch</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()));</span>
<span class="linenos">131</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Writer)</span>
<span class="linenos">132</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Write)</span>
<span class="linenos">133</span><span class="w"> </span><span class="c1">// Write the record batch.</span>
<span class="linenos">134</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">WriteRecordBatch</span><span class="p">(</span><span class="o">*</span><span class="n">rbatch</span><span class="p">));</span>
<span class="linenos">135</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Write)</span>
<span class="linenos">136</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Close)</span>
<span class="linenos">137</span><span class="w"> </span><span class="c1">// Specifically for IPC, the writer needs to be explicitly closed.</span>
<span class="linenos">138</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ipc_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="linenos">139</span><span class="w"> </span><span class="c1">// (Doc section: Arrow Close)</span>
<span class="linenos">140</span>
<span class="linenos">141</span><span class="w"> </span><span class="c1">// (Doc section: CSV Read Open)</span>
<span class="linenos">142</span><span class="w"> </span><span class="c1">// Bind our input file to &quot;test_in.csv&quot;</span>
<span class="linenos">143</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.csv&quot;</span><span class="p">));</span>
<span class="linenos">144</span><span class="w"> </span><span class="c1">// (Doc section: CSV Read Open)</span>
<span class="linenos">145</span><span class="w"> </span><span class="c1">// (Doc section: CSV Table Declare)</span>
<span class="linenos">146</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">csv_table</span><span class="p">;</span>
<span class="linenos">147</span><span class="w"> </span><span class="c1">// (Doc section: CSV Table Declare)</span>
<span class="linenos">148</span><span class="w"> </span><span class="c1">// (Doc section: CSV Reader Make)</span>
<span class="linenos">149</span><span class="w"> </span><span class="c1">// The CSV reader has several objects for various options. For now, we&#39;ll use defaults.</span>
<span class="linenos">150</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span>
<span class="linenos">151</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">csv_reader</span><span class="p">,</span>
<span class="linenos">152</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">TableReader</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span>
<span class="linenos">153</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">default_io_context</span><span class="p">(),</span><span class="w"> </span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ReadOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">(),</span>
<span class="linenos">154</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ParseOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">(),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ConvertOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">()));</span>
<span class="linenos">155</span><span class="w"> </span><span class="c1">// (Doc section: CSV Reader Make)</span>
<span class="linenos">156</span><span class="w"> </span><span class="c1">// (Doc section: CSV Read)</span>
<span class="linenos">157</span><span class="w"> </span><span class="c1">// Read the table.</span>
<span class="linenos">158</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">csv_table</span><span class="p">,</span><span class="w"> </span><span class="n">csv_reader</span><span class="o">-&gt;</span><span class="n">Read</span><span class="p">())</span>
<span class="linenos">159</span><span class="w"> </span><span class="c1">// (Doc section: CSV Read)</span>
<span class="linenos">160</span>
<span class="linenos">161</span><span class="w"> </span><span class="c1">// (Doc section: CSV Write)</span>
<span class="linenos">162</span><span class="w"> </span><span class="c1">// Bind our output file to &quot;test_out.csv&quot;</span>
<span class="linenos">163</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_out.csv&quot;</span><span class="p">));</span>
<span class="linenos">164</span><span class="w"> </span><span class="c1">// The CSV writer has simpler defaults, review API documentation for more complex usage.</span>
<span class="linenos">165</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">csv_writer</span><span class="p">,</span>
<span class="linenos">166</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">MakeCSVWriter</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">csv_table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()));</span>
<span class="linenos">167</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">csv_table</span><span class="p">));</span>
<span class="linenos">168</span><span class="w"> </span><span class="c1">// Not necessary, but a safe practice.</span>
<span class="linenos">169</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">csv_writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="linenos">170</span><span class="w"> </span><span class="c1">// (Doc section: CSV Write)</span>
<span class="linenos">171</span>
<span class="linenos">172</span><span class="w"> </span><span class="c1">// (Doc section: Parquet Read Open)</span>
<span class="linenos">173</span><span class="w"> </span><span class="c1">// Bind our input file to &quot;test_in.parquet&quot;</span>
<span class="linenos">174</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">ReadableFile</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_in.parquet&quot;</span><span class="p">));</span>
<span class="linenos">175</span><span class="w"> </span><span class="c1">// (Doc section: Parquet Read Open)</span>
<span class="linenos">176</span><span class="w"> </span><span class="c1">// (Doc section: Parquet FileReader)</span>
<span class="linenos">177</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">unique_ptr</span><span class="o">&lt;</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">FileReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">reader</span><span class="p">;</span>
<span class="linenos">178</span><span class="w"> </span><span class="c1">// (Doc section: Parquet FileReader)</span>
<span class="linenos">179</span><span class="w"> </span><span class="c1">// (Doc section: Parquet OpenFile)</span>
<span class="linenos">180</span><span class="w"> </span><span class="c1">// Note that Parquet&#39;s OpenFile() takes the reader by reference, rather than returning</span>
<span class="linenos">181</span><span class="w"> </span><span class="c1">// a reader.</span>
<span class="linenos">182</span><span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span>
<span class="linenos">183</span><span class="w"> </span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">OpenFile</span><span class="p">(</span><span class="n">infile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span><span class="w"> </span><span class="o">&amp;</span><span class="n">reader</span><span class="p">));</span>
<span class="linenos">184</span><span class="w"> </span><span class="c1">// (Doc section: Parquet OpenFile)</span>
<span class="linenos">185</span>
<span class="linenos">186</span><span class="w"> </span><span class="c1">// (Doc section: Parquet Read)</span>
<span class="linenos">187</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">parquet_table</span><span class="p">;</span>
<span class="linenos">188</span><span class="w"> </span><span class="c1">// Read the table.</span>
<span class="linenos">189</span><span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span><span class="n">reader</span><span class="o">-&gt;</span><span class="n">ReadTable</span><span class="p">(</span><span class="o">&amp;</span><span class="n">parquet_table</span><span class="p">));</span>
<span class="linenos">190</span><span class="w"> </span><span class="c1">// (Doc section: Parquet Read)</span>
<span class="linenos">191</span>
<span class="linenos">192</span><span class="w"> </span><span class="c1">// (Doc section: Parquet Write)</span>
<span class="linenos">193</span><span class="w"> </span><span class="c1">// Parquet writing does not need a declared writer object. Just get the output</span>
<span class="linenos">194</span><span class="w"> </span><span class="c1">// file bound, then pass in the table, memory pool, output, and chunk size for</span>
<span class="linenos">195</span><span class="w"> </span><span class="c1">// breaking up the Table on-disk.</span>
<span class="linenos">196</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">FileOutputStream</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="s">&quot;test_out.parquet&quot;</span><span class="p">));</span>
<span class="linenos">197</span><span class="w"> </span><span class="n">PARQUET_THROW_NOT_OK</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span>
<span class="linenos">198</span><span class="w"> </span><span class="o">*</span><span class="n">parquet_table</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span><span class="w"> </span><span class="n">outfile</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">));</span>
<span class="linenos">199</span><span class="w"> </span><span class="c1">// (Doc section: Parquet Write)</span>
<span class="linenos">200</span><span class="w"> </span><span class="c1">// (Doc section: Return)</span>
<span class="linenos">201</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">202</span><span class="p">}</span>
<span class="linenos">203</span><span class="c1">// (Doc section: Return)</span>
<span class="linenos">204</span>
<span class="linenos">205</span><span class="c1">// (Doc section: Main)</span>
<span class="linenos">206</span><span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">207</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">st</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">RunMain</span><span class="p">();</span>
<span class="linenos">208</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">st</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">209</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cerr</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">st</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">210</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="mi">1</span><span class="p">;</span>
<span class="linenos">211</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">212</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="linenos">213</span><span class="p">}</span>
<span class="linenos">214</span><span class="c1">// (Doc section: Main)</span>
</pre></div>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="basic_arrow.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Basic Arrow Data Structures</p>
</div>
</a>
<a class="right-next"
href="compute_tutorial.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Arrow Compute</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pre-requisites">Pre-requisites</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#setup">Setup</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#includes">Includes</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#main">Main()</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#generating-files-for-reading">Generating Files for Reading</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#i-o-with-arrow-files">I/O with Arrow Files</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#opening-a-file">Opening a File</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#opening-an-arrow-file-reader">Opening an Arrow file Reader</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-an-open-arrow-file-to-recordbatch">Reading an Open Arrow File to RecordBatch</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prepare-a-fileoutputstream">Prepare a FileOutputStream</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#write-arrow-file-from-recordbatch">Write Arrow File from RecordBatch</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#i-o-with-csv">I/O with CSV</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#opening-a-csv-file">Opening a CSV File</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preparing-a-table">Preparing a Table</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#read-a-csv-file-to-table">Read a CSV File to Table</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#write-a-csv-file-from-table">Write a CSV File from Table</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#file-i-o-with-parquet">File I/O with Parquet</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#opening-a-parquet-file">Opening a Parquet File</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#setting-up-a-parquet-reader">Setting up a Parquet Reader</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-a-parquet-file-to-table">Reading a Parquet File to Table</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-a-parquet-file-from-table">Writing a Parquet File from Table</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ending-program">Ending Program</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/tutorials/io_tutorial.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>