blob: 712b71a423abbc1493c231edb0d7088b33344113 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Input / output and filesystems &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'cpp/io';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/io.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Reading and writing the Arrow IPC format" href="ipc.html" />
<link rel="prev" title="Developer’s Guide" href="acero/developer_guide.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="getting_started.html">Getting Started</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="build_system.html">Using Arrow C++ in your own project</a></li>
<li class="toctree-l2"><a class="reference internal" href="conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/basic_arrow.html">Basic Arrow Data Structures</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/io_tutorial.html">Arrow File I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/compute_tutorial.html">Arrow Compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/datasets_tutorial.html">Arrow Datasets</a></li>
</ul>
</li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="user_guide.html">User Guide</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="overview.html">High-Level Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="memory.html">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="arrays.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="datatypes.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="tables.html">Tabular Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="compute.html">Compute Functions</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="gandiva.html">The Gandiva Expression Compiler</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="gandiva/expr_projector_filter.html">Gandiva Expression, Projector, and Filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="gandiva/external_func.html">Gandiva External Functions Development Guide</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="streaming_execution.html">Acero: A C++ streaming execution engine</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="acero/overview.html">Acero Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/user_guide.html">Acero User’s Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/substrait.html">Using Acero with Substrait</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/developer_guide.html">Developer’s Guide</a></li>
</ul>
</li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Input / output and filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="ipc.html">Reading and writing the Arrow IPC format</a></li>
<li class="toctree-l2"><a class="reference internal" href="orc.html">Reading and Writing ORC files</a></li>
<li class="toctree-l2"><a class="reference internal" href="parquet.html">Reading and writing Parquet files</a></li>
<li class="toctree-l2"><a class="reference internal" href="csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="gdb.html">Debugging code using Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading.html">Thread Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="opentelemetry.html">OpenTelemetry</a></li>
<li class="toctree-l2"><a class="reference internal" href="env_vars.html">Environment Variables</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="examples/index.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="examples/cmake_minimal_build.html">Minimal build using CMake</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/compute_and_write_example.html">Compute and Write CSV Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/dataset_documentation_example.html">Arrow Datasets example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/dataset_skyhook_scan_example.html">Arrow Skyhook example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/row_columnar_conversion.html">Row to columnar conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/tuple_range_conversion.html">std::tuple-like ranges to Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/converting_recordbatch_to_tensor.html">Converting RecordBatch to Tensor</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="api/support.html">Programming Support</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/memory.html">Memory (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/thread.html">Thread (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/datatype.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/array.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/scalar.html">Scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/builder.html">Array Builders</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/table.html">Two-dimensional Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/c_abi.html">C Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/acero.html">Streaming Execution (Acero)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/gandiva.html">Gandiva Expression Compiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/tensor.html">Tensors</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/utilities.html">Utilities</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/async.html">Asynchronous programming</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/io.html">Input / output</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/ipc.html">Arrow IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/formats.html">File Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/cuda.html">CUDA support</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/flightsql.html">Arrow Flight SQL</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/filesystem.html">Filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/dataset.html">Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">C++ cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">C++ Implementation</a></li>
<li class="breadcrumb-item"><a href="user_guide.html" class="nav-link">User Guide</a></li>
<li class="breadcrumb-item active" aria-current="page">Input /...</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="input-output-and-filesystems">
<h1>Input / output and filesystems<a class="headerlink" href="#input-output-and-filesystems" title="Permalink to this heading">#</a></h1>
<p>Arrow provides a range of C++ interfaces abstracting the concrete details
of input / output operations. They operate on streams of untyped binary data.
Those abstractions are used for various purposes such as reading CSV or
Parquet data, transmitting IPC streams, and more.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="api/io.html"><span class="doc">API reference for input/output facilities</span></a>.</p>
</div>
<section id="reading-binary-data">
<h2>Reading binary data<a class="headerlink" href="#reading-binary-data" title="Permalink to this heading">#</a></h2>
<p>Interfaces for reading binary data come in two flavours:</p>
<ul class="simple">
<li><p>Sequential reading: the <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io11InputStreamE" title="arrow::io::InputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">InputStream</span></code></a> interface provides
<code class="docutils literal notranslate"><span class="pre">Read</span></code> methods; it is recommended to <code class="docutils literal notranslate"><span class="pre">Read</span></code> to a <code class="docutils literal notranslate"><span class="pre">Buffer</span></code> as it
may in some cases avoid a memory copy.</p></li>
<li><p>Random access reading: the <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io16RandomAccessFileE" title="arrow::io::RandomAccessFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RandomAccessFile</span></code></a> interface
provides additional facilities for positioning and, most importantly,
the <code class="docutils literal notranslate"><span class="pre">ReadAt</span></code> methods which allow parallel reading from multiple threads.</p></li>
</ul>
<p>Concrete implementations are available for <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io12BufferReaderE" title="arrow::io::BufferReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">in-memory</span> <span class="pre">reads</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io12ReadableFileE" title="arrow::io::ReadableFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">unbuffered</span> <span class="pre">file</span> <span class="pre">reads</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io16MemoryMappedFileE" title="arrow::io::MemoryMappedFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">memory-mapped</span> <span class="pre">file</span> <span class="pre">reads</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io19BufferedInputStreamE" title="arrow::io::BufferedInputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">buffered</span> <span class="pre">reads</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io21CompressedInputStreamE" title="arrow::io::CompressedInputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">compressed</span> <span class="pre">reads</span></code></a>.</p>
</section>
<section id="writing-binary-data">
<h2>Writing binary data<a class="headerlink" href="#writing-binary-data" title="Permalink to this heading">#</a></h2>
<p>Writing binary data is mostly done through the <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">OutputStream</span></code></a>
interface.</p>
<p>Concrete implementations are available for <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io18BufferOutputStreamE" title="arrow::io::BufferOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">in-memory</span> <span class="pre">writes</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io16FileOutputStreamE" title="arrow::io::FileOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">unbuffered</span> <span class="pre">file</span> <span class="pre">writes</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io16MemoryMappedFileE" title="arrow::io::MemoryMappedFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">memory-mapped</span> <span class="pre">file</span> <span class="pre">writes</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io20BufferedOutputStreamE" title="arrow::io::BufferedOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">buffered</span> <span class="pre">writes</span></code></a>,
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io22CompressedOutputStreamE" title="arrow::io::CompressedOutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">compressed</span> <span class="pre">writes</span></code></a>.</p>
</section>
<section id="filesystems">
<span id="cpp-filesystems"></span><h2>Filesystems<a class="headerlink" href="#filesystems" title="Permalink to this heading">#</a></h2>
<p>The <a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">filesystem</span> <span class="pre">interface</span></code></a> allows abstracted access over
various data storage backends such as the local filesystem or a S3 bucket.
It provides input and output streams as well as directory operations.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="api/filesystem.html#cpp-api-filesystems"><span class="std std-ref">Filesystems API reference</span></a>.</p>
</div>
<p>The filesystem interface exposes a simplified view of the underlying data
storage. Data paths are represented as <em>abstract paths</em>, which are
<code class="docutils literal notranslate"><span class="pre">/</span></code>-separated, even on Windows, and shouldn’t include special path
components such as <code class="docutils literal notranslate"><span class="pre">.</span></code> and <code class="docutils literal notranslate"><span class="pre">..</span></code>. Symbolic links, if supported by the
underlying storage, are automatically dereferenced. Only basic
<code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">metadata</span></code> about file entries, such as the file size
and modification time, is made available.</p>
<p>Filesystem instances can be constructed from URI strings using one of the
<a class="reference internal" href="api/filesystem.html#filesystem-factory-functions"><span class="std std-ref">FromUri factories</span></a>, which dispatch to
implementation-specific factories based on the URI’s <code class="docutils literal notranslate"><span class="pre">scheme</span></code>. Other properties
for the new instance are extracted from the URI’s other properties such as the
<code class="docutils literal notranslate"><span class="pre">hostname</span></code>, <code class="docutils literal notranslate"><span class="pre">username</span></code>, etc. Arrow supports runtime registration of new
filesystems, and provides built-in support for several filesystems.</p>
<p>Which built-in filesystems are supported is configured at build time and may include
<a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs15LocalFileSystemE" title="arrow::fs::LocalFileSystem"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">local</span> <span class="pre">filesystem</span> <span class="pre">access</span></code></a>,
<a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs16HadoopFileSystemE" title="arrow::fs::HadoopFileSystem"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">HDFS</span></code></a>,
<a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs12S3FileSystemE" title="arrow::fs::S3FileSystem"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Amazon</span> <span class="pre">S3-compatible</span> <span class="pre">storage</span></code></a> and
<a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs13GcsFileSystemE" title="arrow::fs::GcsFileSystem"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Google</span> <span class="pre">Cloud</span> <span class="pre">Storage</span></code></a>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Tasks that use filesystems will typically run on the
<a class="reference internal" href="threading.html#io-thread-pool"><span class="std std-ref">I/O thread pool</span></a>. For filesystems that support high levels
of concurrency you may get a benefit from increasing the size of the I/O thread pool.</p>
</div>
</section>
<section id="defining-new-filesystems">
<h2>Defining new filesystems<a class="headerlink" href="#defining-new-filesystems" title="Permalink to this heading">#</a></h2>
<p>Support for additional URI schemes can be added to the
<a class="reference internal" href="api/filesystem.html#filesystem-factory-functions"><span class="std std-ref">FromUri factories</span></a>
by registering a factory for each new URI scheme with
<code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">RegisterFileSystemFactory()</span></code>. To enable the common case
wherein it is preferred that registration be automatic, an instance of
<a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs19FileSystemRegistrarE" title="arrow::fs::FileSystemRegistrar"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">FileSystemRegistrar</span></code></a> can be defined at namespace
scope, which will register a factory whenever the instance is loaded:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">kExampleFileSystemModule</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ARROW_REGISTER_FILESYSTEM</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;example&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">[](</span><span class="k">const</span><span class="w"> </span><span class="n">Uri</span><span class="o">&amp;</span><span class="w"> </span><span class="n">uri</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">io</span><span class="o">::</span><span class="n">IOContext</span><span class="o">&amp;</span><span class="w"> </span><span class="n">io_context</span><span class="p">,</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">*</span><span class="w"> </span><span class="n">out_path</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">Result</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">EnsureExampleFileSystemInitialized</span><span class="p">();</span>
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ExampleFileSystem</span><span class="o">&gt;</span><span class="p">();</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="o">&amp;</span><span class="n">EnsureExampleFileSystemFinalized</span>
<span class="p">);</span>
</pre></div>
</div>
<p>If a filesystem implementation requires initialization before any instances
may be constructed, this should be included in the corresponding factory or
otherwise automatically ensured before the factory is invoked. Likewise if
a filesystem implementation requires tear down before the process ends, this
can be wrapped in a function and registered alongside the factory. All
finalizers will be called by <a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs15EnsureFinalizedEv" title="arrow::fs::EnsureFinalized"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">EnsureFinalized()</span></code></a>.</p>
<p>Build complexity can be decreased by compartmentalizing a filesystem
implementation into a separate shared library, which applications may
link or load dynamically. Arrow’s built-in filesystem implementations
also follow this pattern. If a shared library containing instances of
<a class="reference internal" href="api/filesystem.html#_CPPv4N5arrow2fs19FileSystemRegistrarE" title="arrow::fs::FileSystemRegistrar"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">FileSystemRegistrar</span></code></a> must be dynamically loaded,
<code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">LoadFileSystemFactories()</span></code> should be used to load it.
If such a library might link statically to arrow, it
should have exactly one of its sources
<code class="docutils literal notranslate"><span class="pre">#include</span> <span class="pre">&quot;arrow/filesystem/filesystem_library.h&quot;</span></code>
in order to ensure the presence of the symbol on which
<code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">LoadFileSystemFactories()</span></code> depends.</p>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="acero/developer_guide.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Developer’s Guide</p>
</div>
</a>
<a class="right-next"
href="ipc.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Reading and writing the Arrow IPC format</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-binary-data">Reading binary data</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-binary-data">Writing binary data</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#filesystems">Filesystems</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#defining-new-filesystems">Defining new filesystems</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/io.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>