blob: 18334a6a5f07f67a126eca9a861b54af30b4849e [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Reading and Writing CSV files &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'cpp/csv';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/csv.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Reading JSON files" href="json.html" />
<link rel="prev" title="Reading and writing Parquet files" href="parquet.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="getting_started.html">Getting Started</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="build_system.html">Using Arrow C++ in your own project</a></li>
<li class="toctree-l2"><a class="reference internal" href="conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/basic_arrow.html">Basic Arrow Data Structures</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/io_tutorial.html">Arrow File I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/compute_tutorial.html">Arrow Compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/datasets_tutorial.html">Arrow Datasets</a></li>
</ul>
</li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="user_guide.html">User Guide</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="overview.html">High-Level Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="memory.html">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="arrays.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="datatypes.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="tables.html">Tabular Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="compute.html">Compute Functions</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="gandiva.html">The Gandiva Expression Compiler</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="gandiva/expr_projector_filter.html">Gandiva Expression, Projector, and Filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="gandiva/external_func.html">Gandiva External Functions Development Guide</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="streaming_execution.html">Acero: A C++ streaming execution engine</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="acero/overview.html">Acero Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/user_guide.html">Acero User’s Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/substrait.html">Using Acero with Substrait</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/developer_guide.html">Developer’s Guide</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="io.html">Input / output and filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="ipc.html">Reading and writing the Arrow IPC format</a></li>
<li class="toctree-l2"><a class="reference internal" href="orc.html">Reading and Writing ORC files</a></li>
<li class="toctree-l2"><a class="reference internal" href="parquet.html">Reading and writing Parquet files</a></li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Reading and Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="gdb.html">Debugging code using Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading.html">Thread Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="opentelemetry.html">OpenTelemetry</a></li>
<li class="toctree-l2"><a class="reference internal" href="env_vars.html">Environment Variables</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="examples/index.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="examples/cmake_minimal_build.html">Minimal build using CMake</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/compute_and_write_example.html">Compute and Write CSV Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/dataset_documentation_example.html">Arrow Datasets example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/dataset_skyhook_scan_example.html">Arrow Skyhook example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/row_columnar_conversion.html">Row to columnar conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/tuple_range_conversion.html">std::tuple-like ranges to Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/converting_recordbatch_to_tensor.html">Converting RecordBatch to Tensor</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="api/support.html">Programming Support</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/memory.html">Memory (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/thread.html">Thread (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/datatype.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/array.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/scalar.html">Scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/builder.html">Array Builders</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/table.html">Two-dimensional Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/c_abi.html">C Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/acero.html">Streaming Execution (Acero)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/gandiva.html">Gandiva Expression Compiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/tensor.html">Tensors</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/utilities.html">Utilities</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/async.html">Asynchronous programming</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/io.html">Input / output</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/ipc.html">Arrow IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/formats.html">File Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/cuda.html">CUDA support</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/flightsql.html">Arrow Flight SQL</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/filesystem.html">Filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/dataset.html">Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">C++ cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">C++ Implementation</a></li>
<li class="breadcrumb-item"><a href="user_guide.html" class="nav-link">User Guide</a></li>
<li class="breadcrumb-item active" aria-current="page">Reading and...</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="reading-and-writing-csv-files">
<h1>Reading and Writing CSV files<a class="headerlink" href="#reading-and-writing-csv-files" title="Permalink to this heading">#</a></h1>
<p>Arrow provides a fast CSV reader allowing ingestion of external data
to create Arrow Tables or a stream of Arrow RecordBatches.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="api/formats.html#cpp-api-csv"><span class="std std-ref">CSV reader/writer API reference</span></a>.</p>
</div>
<section id="reading-csv-files">
<h2>Reading CSV files<a class="headerlink" href="#reading-csv-files" title="Permalink to this heading">#</a></h2>
<p>Data in a CSV file can either be read in as a single Arrow Table using
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableReader</span></code></a> or streamed as RecordBatches using
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv15StreamingReaderE" title="arrow::csv::StreamingReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">StreamingReader</span></code></a>. See <a class="reference internal" href="#cpp-csv-tradeoffs"><span class="std std-ref">Tradeoffs</span></a> for a
discussion of the tradeoffs between the two methods.</p>
<p>Both these readers require an <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io11InputStreamE" title="arrow::io::InputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::io::InputStream</span></code></a> instance
representing the input file. Their behavior can be customized using a
combination of <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptionsE" title="arrow::csv::ReadOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ReadOptions</span></code></a>,
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv12ParseOptionsE" title="arrow::csv::ParseOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ParseOptions</span></code></a>, and <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptionsE" title="arrow::csv::ConvertOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ConvertOptions</span></code></a>.</p>
<section id="tablereader">
<h3>TableReader<a class="headerlink" href="#tablereader" title="Permalink to this heading">#</a></h3>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&quot;arrow/csv/api.h&quot;</span>
<span class="p">{</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">IOContext</span><span class="w"> </span><span class="n">io_context</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">default_io_context</span><span class="p">();</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">InputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">input</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">...;</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">read_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ReadOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">parse_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ParseOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">convert_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ConvertOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// Instantiate TableReader from input stream and options</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_reader</span><span class="w"> </span><span class="o">=</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">TableReader</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">io_context</span><span class="p">,</span>
<span class="w"> </span><span class="n">input</span><span class="p">,</span>
<span class="w"> </span><span class="n">read_options</span><span class="p">,</span>
<span class="w"> </span><span class="n">parse_options</span><span class="p">,</span>
<span class="w"> </span><span class="n">convert_options</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_reader</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle TableReader instantiation error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">TableReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">maybe_reader</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// Read table from CSV file</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="o">-&gt;</span><span class="n">Read</span><span class="p">();</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_table</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle CSV read error</span>
<span class="w"> </span><span class="c1">// (for example a CSV syntax error or failed type conversion)</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">maybe_table</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="streamingreader">
<h3>StreamingReader<a class="headerlink" href="#streamingreader" title="Permalink to this heading">#</a></h3>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&quot;arrow/csv/api.h&quot;</span>
<span class="p">{</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">IOContext</span><span class="w"> </span><span class="n">io_context</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">default_io_context</span><span class="p">();</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">InputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">input</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">...;</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">read_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ReadOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">parse_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ParseOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">convert_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">ConvertOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// Instantiate StreamingReader from input stream and options</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_reader</span><span class="w"> </span><span class="o">=</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">StreamingReader</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">io_context</span><span class="p">,</span>
<span class="w"> </span><span class="n">input</span><span class="p">,</span>
<span class="w"> </span><span class="n">read_options</span><span class="p">,</span>
<span class="w"> </span><span class="n">parse_options</span><span class="p">,</span>
<span class="w"> </span><span class="n">convert_options</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_reader</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle StreamingReader instantiation error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">StreamingReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">maybe_reader</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// Set aside a RecordBatch pointer for re-use while streaming</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">RecordBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">batch</span><span class="p">;</span>
<span class="w"> </span><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="nb">true</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Attempt to read the first RecordBatch</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="o">-&gt;</span><span class="n">ReadNext</span><span class="p">(</span><span class="o">&amp;</span><span class="n">batch</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">status</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle read error</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">batch</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="nb">NULL</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle end of file</span>
<span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="c1">// Do something with the batch</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="tradeoffs">
<span id="cpp-csv-tradeoffs"></span><h3>Tradeoffs<a class="headerlink" href="#tradeoffs" title="Permalink to this heading">#</a></h3>
<p>The choice between using <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableReader</span></code></a> or
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv15StreamingReaderE" title="arrow::csv::StreamingReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">StreamingReader</span></code></a> will ultimately depend on the use case
but there are a few tradeoffs to be aware of:</p>
<ol class="arabic simple">
<li><p><strong>Memory usage:</strong> <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableReader</span></code></a> loads all of the data
into memory at once and, depending on the amount of data, may require
considerably more memory than <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv15StreamingReaderE" title="arrow::csv::StreamingReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">StreamingReader</span></code></a> which
only loads one <a class="reference internal" href="api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a> at a time. This is likely to be
the most significant tradeoff for users.</p></li>
<li><p><strong>Speed:</strong> When reading the entire contents of a CSV,
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableReader</span></code></a> will tend to be faster than
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv15StreamingReaderE" title="arrow::csv::StreamingReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">StreamingReader</span></code></a> because it makes better use of
available cores. See <a class="reference internal" href="#cpp-csv-performance"><span class="std std-ref">Performance</span></a> for more
details.</p></li>
<li><p><strong>Flexibility:</strong> <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv15StreamingReaderE" title="arrow::csv::StreamingReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">StreamingReader</span></code></a> might be considered
less flexible than <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableReader</span></code></a> because it performs type
inference only on the first block that’s read in, after which point the types
are frozen and any data in subsequent blocks that cannot be converted to
those types will cause an error. Note that this can be remedied either by
setting <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptions10block_sizeE" title="arrow::csv::ReadOptions::block_size"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ReadOptions::block_size</span></code></a> to a large enough value or by using
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions12column_typesE" title="arrow::csv::ConvertOptions::column_types"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::column_types</span></code></a> to set the desired data types
explicitly.</p></li>
</ol>
</section>
</section>
<section id="writing-csv-files">
<h2>Writing CSV files<a class="headerlink" href="#writing-csv-files" title="Permalink to this heading">#</a></h2>
<p>A CSV file is written to a <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">OutputStream</span></code></a>.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/csv/api.h&gt;</span>
<span class="p">{</span>
<span class="w"> </span><span class="c1">// Oneshot write</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">OutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">output</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">...;</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">write_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">WriteOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">WriteCSV</span><span class="p">(</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">write_options</span><span class="p">,</span><span class="w"> </span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">()).</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle writer error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
<span class="p">{</span>
<span class="w"> </span><span class="c1">// Write incrementally</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">OutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">output</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">...;</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">write_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">WriteOptions</span><span class="o">::</span><span class="n">Defaults</span><span class="p">();</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">csv</span><span class="o">::</span><span class="n">MakeCSVWriter</span><span class="p">(</span><span class="n">output</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">write_options</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_writer</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle writer instantiation error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">RecordBatchWriter</span><span class="o">&gt;</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">maybe_writer</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// Write batches...</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">WriteRecordBatch</span><span class="p">(</span><span class="o">*</span><span class="n">batch</span><span class="p">).</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle write error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">().</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle close error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">output</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">().</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle file close error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The writer does not yet support all Arrow types.</p>
</div>
</section>
<section id="column-names">
<h2>Column names<a class="headerlink" href="#column-names" title="Permalink to this heading">#</a></h2>
<p>There are three possible ways to infer column names from the CSV file:</p>
<ul class="simple">
<li><p>By default, the column names are read from the first row in the CSV file</p></li>
<li><p>If <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptions12column_namesE" title="arrow::csv::ReadOptions::column_names"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ReadOptions::column_names</span></code></a> is set, it forces the column
names in the table to these values (the first row in the CSV file is
read as data)</p></li>
<li><p>If <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptions25autogenerate_column_namesE" title="arrow::csv::ReadOptions::autogenerate_column_names"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ReadOptions::autogenerate_column_names</span></code></a> is true, column names
will be autogenerated with the pattern “f0”, “f1”… (the first row in the
CSV file is read as data)</p></li>
</ul>
</section>
<section id="column-selection">
<h2>Column selection<a class="headerlink" href="#column-selection" title="Permalink to this heading">#</a></h2>
<p>By default, Arrow reads all columns in the CSV file. You can narrow the
selection of columns with the <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions15include_columnsE" title="arrow::csv::ConvertOptions::include_columns"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::include_columns</span></code></a>
option. If some columns in <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions15include_columnsE" title="arrow::csv::ConvertOptions::include_columns"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::include_columns</span></code></a>
are missing from the CSV file, an error will be emitted unless
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions23include_missing_columnsE" title="arrow::csv::ConvertOptions::include_missing_columns"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::include_missing_columns</span></code></a> is true, in which case
the missing columns are assumed to contain all-null values.</p>
<section id="interaction-with-column-names">
<h3>Interaction with column names<a class="headerlink" href="#interaction-with-column-names" title="Permalink to this heading">#</a></h3>
<p>If both <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptions12column_namesE" title="arrow::csv::ReadOptions::column_names"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ReadOptions::column_names</span></code></a> and
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions15include_columnsE" title="arrow::csv::ConvertOptions::include_columns"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::include_columns</span></code></a> are specified,
the <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptions12column_namesE" title="arrow::csv::ReadOptions::column_names"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ReadOptions::column_names</span></code></a> are assumed to map to CSV columns,
and <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions15include_columnsE" title="arrow::csv::ConvertOptions::include_columns"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::include_columns</span></code></a> is a subset of those column
names that will part of the Arrow Table.</p>
</section>
</section>
<section id="data-types">
<h2>Data types<a class="headerlink" href="#data-types" title="Permalink to this heading">#</a></h2>
<p>By default, the CSV reader infers the most appropriate data type for each
column. Type inference considers the following data types, in order:</p>
<ul class="simple">
<li><p>Null</p></li>
<li><p>Int64</p></li>
<li><p>Boolean</p></li>
<li><p>Date32</p></li>
<li><p>Time32 (with seconds unit)</p></li>
<li><p>Timestamp (with seconds unit)</p></li>
<li><p>Timestamp (with nanoseconds unit)</p></li>
<li><p>Float64</p></li>
<li><p>Dictionary&lt;String&gt; (if <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions16auto_dict_encodeE" title="arrow::csv::ConvertOptions::auto_dict_encode"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::auto_dict_encode</span></code></a> is true)</p></li>
<li><p>Dictionary&lt;Binary&gt; (if <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions16auto_dict_encodeE" title="arrow::csv::ConvertOptions::auto_dict_encode"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::auto_dict_encode</span></code></a> is true)</p></li>
<li><p>String</p></li>
<li><p>Binary</p></li>
</ul>
<p>It is possible to override type inference for select columns by setting
the <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions12column_typesE" title="arrow::csv::ConvertOptions::column_types"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::column_types</span></code></a> option. Explicit data types
can be chosen from the following list:</p>
<ul class="simple">
<li><p>Null</p></li>
<li><p>All Integer types</p></li>
<li><p>Float32 and Float64</p></li>
<li><p>Decimal128</p></li>
<li><p>Boolean</p></li>
<li><p>Date32 and Date64</p></li>
<li><p>Time32 and Time64</p></li>
<li><p>Timestamp</p></li>
<li><p>Binary and Large Binary</p></li>
<li><p>String and Large String (with optional UTF8 input validation)</p></li>
<li><p>Fixed-Size Binary</p></li>
<li><p>Dictionary with index type Int32 and value type one of the following:
Binary, String, LargeBinary, LargeString, Int32, UInt32, Int64, UInt64,
Float32, Float64, Decimal128</p></li>
</ul>
<p>Other data types do not support conversion from CSV values and will error out.</p>
<section id="dictionary-inference">
<h3>Dictionary inference<a class="headerlink" href="#dictionary-inference" title="Permalink to this heading">#</a></h3>
<p>If type inference is enabled and <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions16auto_dict_encodeE" title="arrow::csv::ConvertOptions::auto_dict_encode"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::auto_dict_encode</span></code></a>
is true, the CSV reader first tries to convert string-like columns to a
dictionary-encoded string-like array. It switches to a plain string-like
array when the threshold in <code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::auto_dict_max_cardinality</span></code>
is reached.</p>
</section>
<section id="timestamp-inference-parsing">
<h3>Timestamp inference/parsing<a class="headerlink" href="#timestamp-inference-parsing" title="Permalink to this heading">#</a></h3>
<p>If type inference is enabled, the CSV reader first tries to interpret
string-like columns as timestamps. If all rows have some zone offset
(e.g. <code class="docutils literal notranslate"><span class="pre">Z</span></code> or <code class="docutils literal notranslate"><span class="pre">+0100</span></code>), even if the offsets are inconsistent, then the
inferred type will be UTC timestamp. If no rows have a zone offset, then the
inferred type will be timestamp without timezone. A mix of rows with/without
offsets will result in a string column.</p>
<p>If the type is explicitly specified as a timestamp with/without timezone, then
the reader will error on values without/with zone offsets in that column. Note
that this means it isn’t currently possible to have the reader parse a column
of timestamps without zone offsets as local times in a particular timezone;
instead, parse the column as timestamp without timezone, then convert the
values afterwards using the <code class="docutils literal notranslate"><span class="pre">assume_timezone</span></code> compute function.</p>
<table class="table">
<thead>
<tr class="row-odd"><th class="head"><p>Specified Type</p></th>
<th class="head"><p>Input CSV</p></th>
<th class="head"><p>Result Type</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td rowspan="4"><p>(inferred)</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00</span></code></p></td>
<td><p>timestamp[s]</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00Z</span></code></p></td>
<td rowspan="2"><p>timestamp[s, UTC]</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00+0100</span></code></p></td>
</tr>
<tr class="row-odd"><td><div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span>
<span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span><span class="n">Z</span>
</pre></div>
</div>
</td>
<td><p>string</p></td>
</tr>
<tr class="row-even"><td rowspan="4"><p>timestamp[s]</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00</span></code></p></td>
<td><p>timestamp[s]</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00Z</span></code></p></td>
<td rowspan="3"><p>(error)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00+0100</span></code></p></td>
</tr>
<tr class="row-odd"><td><div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span>
<span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span><span class="n">Z</span>
</pre></div>
</div>
</td>
</tr>
<tr class="row-even"><td rowspan="4"><p>timestamp[s, UTC]</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00</span></code></p></td>
<td><p>(error)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00Z</span></code></p></td>
<td rowspan="2"><p>timestamp[s, UTC]</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00+0100</span></code></p></td>
</tr>
<tr class="row-odd"><td><div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span>
<span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span><span class="n">Z</span>
</pre></div>
</div>
</td>
<td><p>(error)</p></td>
</tr>
<tr class="row-even"><td rowspan="4"><p>timestamp[s,
America/New_York]</p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00</span></code></p></td>
<td><p>(error)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00Z</span></code></p></td>
<td rowspan="2"><p>timestamp[s,
America/New_York]</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">2021-01-01T00:00:00+0100</span></code></p></td>
</tr>
<tr class="row-odd"><td><div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span>
<span class="mi">2021</span><span class="mo">-01-01</span><span class="n">T00</span><span class="o">:</span><span class="mo">00</span><span class="o">:</span><span class="mo">00</span><span class="n">Z</span>
</pre></div>
</div>
</td>
<td><p>(error)</p></td>
</tr>
</tbody>
</table>
</section>
<section id="nulls">
<h3>Nulls<a class="headerlink" href="#nulls" title="Permalink to this heading">#</a></h3>
<p>Null values are recognized from the spellings stored in
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions11null_valuesE" title="arrow::csv::ConvertOptions::null_values"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ConvertOptions::null_values</span></code></a>. The <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv14ConvertOptions8DefaultsEv" title="arrow::csv::ConvertOptions::Defaults"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ConvertOptions::Defaults()</span></code></a>
factory method will initialize a number of conventional null spellings such
as <code class="docutils literal notranslate"><span class="pre">N/A</span></code>.</p>
</section>
<section id="character-encoding">
<h3>Character encoding<a class="headerlink" href="#character-encoding" title="Permalink to this heading">#</a></h3>
<p>CSV files are expected to be encoded in UTF8. However, non-UTF8 data
is accepted for Binary columns.</p>
</section>
</section>
<section id="write-options">
<h2>Write Options<a class="headerlink" href="#write-options" title="Permalink to this heading">#</a></h2>
<p>The format of written CSV files can be customized via <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv12WriteOptionsE" title="arrow::csv::WriteOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">WriteOptions</span></code></a>.
Currently few options are available; more will be added in future releases.</p>
</section>
<section id="performance">
<span id="cpp-csv-performance"></span><h2>Performance<a class="headerlink" href="#performance" title="Permalink to this heading">#</a></h2>
<p>By default, <a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11TableReaderE" title="arrow::csv::TableReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableReader</span></code></a> will parallelize reads in order to
exploit all CPU cores on your machine. You can change this setting in
<a class="reference internal" href="api/formats.html#_CPPv4N5arrow3csv11ReadOptions11use_threadsE" title="arrow::csv::ReadOptions::use_threads"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">ReadOptions::use_threads</span></code></a>. A reasonable expectation is at least
100 MB/s per core on a performant desktop or laptop computer (measured in
source CSV bytes, not target Arrow data bytes).</p>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="parquet.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Reading and writing Parquet files</p>
</div>
</a>
<a class="right-next"
href="json.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Reading JSON files</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-csv-files">Reading CSV files</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tablereader">TableReader</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#streamingreader">StreamingReader</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tradeoffs">Tradeoffs</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-csv-files">Writing CSV files</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#column-names">Column names</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#column-selection">Column selection</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#interaction-with-column-names">Interaction with column names</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-types">Data types</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dictionary-inference">Dictionary inference</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#timestamp-inference-parsing">Timestamp inference/parsing</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#nulls">Nulls</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#character-encoding">Character encoding</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#write-options">Write Options</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#performance">Performance</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/csv.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>