blob: ee29063d1dea3604bd2b576a6ba42f1965a766ce [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Dataset &#8212; Apache Arrow v17.0.0.dev59</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/sphinx_highlight.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script src="../../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'cpp/api/dataset';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/api/dataset.html" />
<link rel="icon" href="../../_static/favicon.ico"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Java Implementation" href="../../java/index.html" />
<link rel="prev" title="Filesystems" href="filesystem.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../index.html">
<img src="../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev59 - Home"/>
<script>document.write(`<img src="../../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev59 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="../index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="../index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="../getting_started.html">Getting Started</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../build_system.html">Using Arrow C++ in your own project</a></li>
<li class="toctree-l2"><a class="reference internal" href="../conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/basic_arrow.html">Basic Arrow Data Structures</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/io_tutorial.html">Arrow File I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/compute_tutorial.html">Arrow Compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/datasets_tutorial.html">Arrow Datasets</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../user_guide.html">User Guide</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../overview.html">High-Level Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../memory.html">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../arrays.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../datatypes.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tables.html">Tabular Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="../compute.html">Compute Functions</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../gandiva.html">The Gandiva Expression Compiler</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../gandiva/expr_projector_filter.html">Gandiva Expression, Projector, and Filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../gandiva/external_func.html">Gandiva External Functions Development Guide</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../streaming_execution.html">Acero: A C++ streaming execution engine</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../acero/overview.html">Acero Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="../acero/user_guide.html">Acero User’s Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="../acero/substrait.html">Using Acero with Substrait</a></li>
<li class="toctree-l3"><a class="reference internal" href="../acero/developer_guide.html">Developer’s Guide</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../io.html">Input / output and filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ipc.html">Reading and writing the Arrow IPC format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../orc.html">Reading and Writing ORC files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../parquet.html">Reading and writing Parquet files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../gdb.html">Debugging code using Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../threading.html">Thread Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../opentelemetry.html">OpenTelemetry</a></li>
<li class="toctree-l2"><a class="reference internal" href="../env_vars.html">Environment Variables</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/index.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../examples/cmake_minimal_build.html">Minimal build using CMake</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/compute_and_write_example.html">Compute and Write CSV Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/dataset_documentation_example.html">Arrow Datasets example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/dataset_skyhook_scan_example.html">Arrow Skyhook example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/row_columnar_conversion.html">Row to columnar conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/tuple_range_conversion.html">std::tuple-like ranges to Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/converting_recordbatch_to_tensor.html">Converting RecordBatch to Tensor</a></li>
</ul>
</li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../api.html">API Reference</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="support.html">Programming Support</a></li>
<li class="toctree-l2"><a class="reference internal" href="memory.html">Memory (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="thread.html">Thread (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="datatype.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="array.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="scalar.html">Scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="builder.html">Array Builders</a></li>
<li class="toctree-l2"><a class="reference internal" href="table.html">Two-dimensional Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="c_abi.html">C Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="acero.html">Streaming Execution (Acero)</a></li>
<li class="toctree-l2"><a class="reference internal" href="gandiva.html">Gandiva Expression Compiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="tensor.html">Tensors</a></li>
<li class="toctree-l2"><a class="reference internal" href="utilities.html">Utilities</a></li>
<li class="toctree-l2"><a class="reference internal" href="async.html">Asynchronous programming</a></li>
<li class="toctree-l2"><a class="reference internal" href="io.html">Input / output</a></li>
<li class="toctree-l2"><a class="reference internal" href="ipc.html">Arrow IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="formats.html">File Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="cuda.html">CUDA support</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="flightsql.html">Arrow Flight SQL</a></li>
<li class="toctree-l2"><a class="reference internal" href="filesystem.html">Filesystems</a></li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">C++ cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../index.html" class="nav-link">C++ Implementation</a></li>
<li class="breadcrumb-item"><a href="../api.html" class="nav-link">API Reference</a></li>
<li class="breadcrumb-item active" aria-current="page">Dataset</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="dataset">
<h1>Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h1>
<section id="interface">
<h2>Interface<a class="headerlink" href="#interface" title="Permalink to this heading">#</a></h2>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8FragmentE">
<span id="_CPPv3N5arrow7dataset8FragmentE"></span><span id="_CPPv2N5arrow7dataset8FragmentE"></span><span id="arrow::dataset::Fragment"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Fragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">enable_shared_from_this</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8FragmentE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A granular piece of a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>, such as an individual file. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> can be read/scanned separately from other fragments. It yields a collection of RecordBatches when scanned</p>
<p>Note that Fragments have well defined physical schemas which are reconciled by the Datasets which contain them; these physical schemas may differ from a parent <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>’s schema and the physical schemas of sibling Fragments. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">arrow::dataset::FileFragment</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_in_memory_fragment"><span class="std std-ref">arrow::dataset::InMemoryFragment</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment18ReadPhysicalSchemaEv">
<span id="_CPPv3N5arrow7dataset8Fragment18ReadPhysicalSchemaEv"></span><span id="_CPPv2N5arrow7dataset8Fragment18ReadPhysicalSchemaEv"></span><span id="arrow::dataset::Fragment::ReadPhysicalSchema"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1adbc8153a7053d4f88c88e921390e006d"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReadPhysicalSchema</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment18ReadPhysicalSchemaEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the physical schema of the <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
<p>The physical schema is also called the writer schema. This method is blocking and may suffer from high latency filesystem. The schema is cached after being read once, or may be specified at construction. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::Fragment::ScanBatchesAsync__std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a216bcd9c413a8ac0faecced93da5ce14"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesAsync</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>An asynchronous version of Scan. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3N5arrow7dataset8Fragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2N5arrow7dataset8Fragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::Fragment::InspectFragment__FragmentScanOptionsCP.compute::ExecContextP"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a9470773ff2b90992a2aabb0f238f37f2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">InspectedFragment</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectFragment</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Inspect a fragment to learn basic information. </p>
<p>This will be called before a scan and a fragment should attach whatever information will be needed to figure out an evolution strategy. This information will then be passed to the call to BeginScan </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3N5arrow7dataset8Fragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2N5arrow7dataset8Fragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::Fragment::BeginScan__FragmentScanRequestCR.InspectedFragmentCR.FragmentScanOptionsCP.compute::ExecContextP"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1ae76e4267d5e7fd5f6f8fd9d882f5156a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">FragmentScanner</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">BeginScan</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">FragmentScanRequest</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">request</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">InspectedFragment</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">inspected_fragment</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Start a scan operation. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset8Fragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset8Fragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::Fragment::CountRows__compute::Expression.std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a15766dc34340ee7a9b634a35c07d4885"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">optional</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">int64_t</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CountRows</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Count the number of rows in this fragment matching the filter using metadata only. </p>
<p>That is, this method may perform I/O, but will not load data.</p>
<p>If this is not possible, resolve with an empty optional. The fragment can perform I/O (e.g. to read metadata) before it deciding whether it can satisfy the request. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset8Fragment20partition_expressionEv">
<span id="_CPPv3NK5arrow7dataset8Fragment20partition_expressionEv"></span><span id="_CPPv2NK5arrow7dataset8Fragment20partition_expressionEv"></span><span id="arrow::dataset::Fragment::partition_expressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a36584dd073ed21462020dd06cf25a8c8"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partition_expression</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset8Fragment20partition_expressionEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>An expression which evaluates to true for all data viewed by this <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-attributes">Public Static Attributes</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment23kNoPartitionInformationE">
<span id="_CPPv3N5arrow7dataset8Fragment23kNoPartitionInformationE"></span><span id="_CPPv2N5arrow7dataset8Fragment23kNoPartitionInformationE"></span><span id="arrow::dataset::Fragment::kNoPartitionInformation__compute::ExpressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1ad8ade346db1b47ec8d6df3b91804fc29"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kNoPartitionInformation</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment23kNoPartitionInformationE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>An expression that represents no known partition information. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7DatasetE">
<span id="_CPPv3N5arrow7dataset7DatasetE"></span><span id="_CPPv2N5arrow7dataset7DatasetE"></span><span id="arrow::dataset::Dataset"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Dataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">enable_shared_from_this</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7DatasetE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A container of zero or more Fragments. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> acts as a union of Fragments, e.g. files deeply nested in a directory. A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> has a schema to which Fragments must align during a scan operation. This is analogous to Avro’s reader and writer schema. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">arrow::dataset::FileSystemDataset</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_in_memory_dataset"><span class="std std-ref">arrow::dataset::InMemoryDataset</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset"><span class="std std-ref">arrow::dataset::UnionDataset</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Dataset7NewScanEv">
<span id="_CPPv3N5arrow7dataset7Dataset7NewScanEv"></span><span id="_CPPv2N5arrow7dataset7Dataset7NewScanEv"></span><span id="arrow::dataset::Dataset::NewScan"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1a1a7cbac64269923faa404d9f50885b61"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14ScannerBuilderE" title="arrow::dataset::ScannerBuilder"><span class="n"><span class="pre">ScannerBuilder</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">NewScan</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Dataset7NewScanEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Begin to build a new Scan operation against this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Dataset12GetFragmentsEN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset7Dataset12GetFragmentsEN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset7Dataset12GetFragmentsEN7compute10ExpressionE"></span><span id="arrow::dataset::Dataset::GetFragments__compute::Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1a2861d2e508798427d2f602ef9bc9dbe9"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">FragmentIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetFragments</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Dataset12GetFragmentsEN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>GetFragments returns an iterator of Fragments given a predicate. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Dataset17GetFragmentsAsyncEN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset7Dataset17GetFragmentsAsyncEN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset7Dataset17GetFragmentsAsyncEN7compute10ExpressionE"></span><span id="arrow::dataset::Dataset::GetFragmentsAsync__compute::Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1a5c72a12d0ec89bf336be1a1a83c2177a"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">FragmentGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetFragmentsAsync</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Dataset17GetFragmentsAsyncEN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Async versions of <code class="docutils literal notranslate"><span class="pre">GetFragments</span></code>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Dataset20partition_expressionEv">
<span id="_CPPv3NK5arrow7dataset7Dataset20partition_expressionEv"></span><span id="_CPPv2NK5arrow7dataset7Dataset20partition_expressionEv"></span><span id="arrow::dataset::Dataset::partition_expressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1a4a9585ba199b1ad2daff59b3981dc675"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partition_expression</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Dataset20partition_expressionEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>An expression which evaluates to true for all data viewed by this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
<p>May be null, which indicates no information is available. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Dataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset7Dataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset7Dataset9type_nameEv"></span><span id="arrow::dataset::Dataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1ac7611e16eb019f295612a9ef428fdfd2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Dataset9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::Dataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1aa615b8333c7ac0e5553adbc9877cb44a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return a copy of this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with a different schema. </p>
<p>The copy will view the same Fragments. If the new schema is not compatible with the original dataset’s schema then an error will be raised. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Dataset18evolution_strategyEv">
<span id="_CPPv3N5arrow7dataset7Dataset18evolution_strategyEv"></span><span id="_CPPv2N5arrow7dataset7Dataset18evolution_strategyEv"></span><span id="arrow::dataset::Dataset::evolution_strategy"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1ab645e6acad3191131f29c742d7c3dd01"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="n"><span class="pre">DatasetEvolutionStrategy</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="sig-name descname"><span class="n"><span class="pre">evolution_strategy</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Dataset18evolution_strategyEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Rules used by this dataset to handle schema evolution. </p>
</dd></dl>
</div>
</dd></dl>
</section>
<section id="partitioning">
<h2>Partitioning<a class="headerlink" href="#partitioning" title="Permalink to this heading">#</a></h2>
<dl class="cpp enum-class">
<dt class="sig sig-object cpp" id="_CPPv415SegmentEncoding">
<span id="_CPPv315SegmentEncoding"></span><span id="_CPPv215SegmentEncoding"></span><span class="target" id="group__dataset-partitioning_1gab81087db80e3468712d56ab71eaf2964"></span><span class="k"><span class="pre">enum</span></span><span class="w"> </span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SegmentEncoding</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int8_t</span></span><a class="headerlink" href="#_CPPv415SegmentEncoding" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The encoding of partition segments. </p>
<p><em>Values:</em></p>
<dl class="cpp enumerator">
<dt class="sig sig-object cpp" id="_CPPv4N15SegmentEncoding4NoneE">
<span id="_CPPv3N15SegmentEncoding4NoneE"></span><span id="_CPPv2N15SegmentEncoding4NoneE"></span><span class="target" id="group__dataset-partitioning_1ggab81087db80e3468712d56ab71eaf2964a6adf97f83acf6453d4a6a4b1070f3754"></span><span class="k"><span class="pre">enumerator</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">None</span></span></span><a class="headerlink" href="#_CPPv4N15SegmentEncoding4NoneE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>No encoding. </p>
</dd></dl>
<dl class="cpp enumerator">
<dt class="sig sig-object cpp" id="_CPPv4N15SegmentEncoding3UriE">
<span id="_CPPv3N15SegmentEncoding3UriE"></span><span id="_CPPv2N15SegmentEncoding3UriE"></span><span class="target" id="group__dataset-partitioning_1ggab81087db80e3468712d56ab71eaf2964a3840cd8f73026713059f0ed0562c5493"></span><span class="k"><span class="pre">enumerator</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Uri</span></span></span><a class="headerlink" href="#_CPPv4N15SegmentEncoding3UriE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Segment values are URL-encoded. </p>
</dd></dl>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv424kDefaultHiveNullFallback">
<span id="_CPPv324kDefaultHiveNullFallback"></span><span id="_CPPv224kDefaultHiveNullFallback"></span><span id="kDefaultHiveNullFallback__cA"></span><span class="target" id="group__dataset-partitioning_1ga748eb813cc9d96f51d5a0f534bdf26a1"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">char</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultHiveNullFallback</span></span></span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;__HIVE_DEFAULT_PARTITION__&quot;</span></span><a class="headerlink" href="#_CPPv424kDefaultHiveNullFallback" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The default fallback used for null values in a Hive-style partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4lsRNSt7ostreamE15SegmentEncoding">
<span id="_CPPv3lsRNSt7ostreamE15SegmentEncoding"></span><span id="_CPPv2lsRNSt7ostreamE15SegmentEncoding"></span><span id="lshift-operator__osR.SegmentEncoding"></span><span class="target" id="group__dataset-partitioning_1ga77aaff90ffcdee5250fc2af0460010f0"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ostream</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="k"><span class="pre">operator</span></span><span class="o"><span class="pre">&lt;&lt;</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ostream</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">os</span></span>, <a class="reference internal" href="#_CPPv415SegmentEncoding" title="SegmentEncoding"><span class="n"><span class="pre">SegmentEncoding</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">segment_encoding</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4lsRNSt7ostreamE15SegmentEncoding" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv411StripPrefixRKNSt6stringERKNSt6stringE">
<span id="_CPPv311StripPrefixRKNSt6stringERKNSt6stringE"></span><span id="_CPPv211StripPrefixRKNSt6stringERKNSt6stringE"></span><span id="StripPrefix__ssCR.ssCR"></span><span class="target" id="group__dataset-partitioning_1gae37bc9570d329f5d81ff987be7c2e1ff"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefix</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv411StripPrefixRKNSt6stringERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv422StripPrefixAndFilenameRKNSt6stringERKNSt6stringE">
<span id="_CPPv322StripPrefixAndFilenameRKNSt6stringERKNSt6stringE"></span><span id="_CPPv222StripPrefixAndFilenameRKNSt6stringERKNSt6stringE"></span><span id="StripPrefixAndFilename__ssCR.ssCR"></span><span class="target" id="group__dataset-partitioning_1ga24e40e5fc8b7f84d097f8a0bcb3d77e0"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefixAndFilename</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv422StripPrefixAndFilenameRKNSt6stringERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Extracts the directory and filename and removes the prefix of a path. </p>
<p>e.g., <code class="docutils literal notranslate"><span class="pre">StripPrefixAndFilename(&quot;/data/year=2019/c.txt&quot;,</span> <span class="pre">&quot;/data&quot;)</span> <span class="pre">-&gt;</span> <span class="pre">{&quot;year=2019&quot;,&quot;c.txt&quot;}</span></code></p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv422StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE">
<span id="_CPPv322StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE"></span><span id="_CPPv222StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE"></span><span id="StripPrefixAndFilename__std::vector:ss:CR.ssCR"></span><span class="target" id="group__dataset-partitioning_1gabfa1200ecfebac5bdff1239aa1eb8270"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefixAndFilename</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv422StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Vector version of StripPrefixAndFilename. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv422StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE">
<span id="_CPPv322StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE"></span><span id="_CPPv222StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE"></span><span id="StripPrefixAndFilename__std::vector:fs::FileInfo:CR.ssCR"></span><span class="target" id="group__dataset-partitioning_1ga4f897ffe4c649c15c704853fefd5d60c"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefixAndFilename</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileInfo</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">files</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv422StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Vector version of StripPrefixAndFilename. </p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12PartitioningE">
<span id="_CPPv3N5arrow7dataset12PartitioningE"></span><span id="_CPPv2N5arrow7dataset12PartitioningE"></span><span id="arrow::dataset::Partitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Partitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">util</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">EqualityComparable</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12PartitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Interface for parsing partition expressions from string partition identifiers. </p>
<p>For example, the identifier “foo=5” might be parsed to an equality expression between the “foo” field and the value 5.</p>
<p>Some partitionings may store the field names in a metadata store instead of in file paths, for example dataset_root/2009/11/… could be used when the partition fields are “year” and “month”</p>
<p>Paths are consumed from left to right. Paths must be relative to the root of a partition; path prefixes must be removed before passing the path to a partitioning for parsing. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_function_partitioning"><span class="std std-ref">arrow::dataset::FunctionPartitioning</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_key_value_partitioning"><span class="std std-ref">arrow::dataset::KeyValuePartitioning</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12Partitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset12Partitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset12Partitioning9type_nameEv"></span><span id="arrow::dataset::Partitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1acb1a6614ca4a3e55a2e56d491b80e58b"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12Partitioning9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12Partitioning5ParseERKNSt6stringE">
<span id="_CPPv3NK5arrow7dataset12Partitioning5ParseERKNSt6stringE"></span><span id="_CPPv2NK5arrow7dataset12Partitioning5ParseERKNSt6stringE"></span><span id="arrow::dataset::Partitioning::Parse__ssCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1a35a6041a4f4306704d088d05c2d3d490"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Parse</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12Partitioning5ParseERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Parse a path into a partition expression. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12Partitioning6schemaEv">
<span id="_CPPv3NK5arrow7dataset12Partitioning6schemaEv"></span><span id="_CPPv2NK5arrow7dataset12Partitioning6schemaEv"></span><span id="arrow::dataset::Partitioning::schemaC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1ad1bd89877d05e5df2586dda89c21a1f2"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12Partitioning6schemaEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The partition schema. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12Partitioning7DefaultEv">
<span id="_CPPv3N5arrow7dataset12Partitioning7DefaultEv"></span><span id="_CPPv2N5arrow7dataset12Partitioning7DefaultEv"></span><span id="arrow::dataset::Partitioning::Default"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1a157ec7c73980998eda65bb012ad6aaf2"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Default</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset12Partitioning7DefaultEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A default <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> which is a <a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">DirectoryPartitioning</span></a> with an empty schema. </p>
</dd></dl>
</div>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12Partitioning18PartitionedBatchesE">
<span id="_CPPv3N5arrow7dataset12Partitioning18PartitionedBatchesE"></span><span id="_CPPv2N5arrow7dataset12Partitioning18PartitionedBatchesE"></span><span id="arrow::dataset::Partitioning::PartitionedBatches"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_1_1_partitioned_batches"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">PartitionedBatches</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12Partitioning18PartitionedBatchesE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>If the input batch shares any fields with this partitioning, produce sub-batches which satisfy mutually exclusive Expressions. </p>
</dd></dl>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset27KeyValuePartitioningOptionsE">
<span id="_CPPv3N5arrow7dataset27KeyValuePartitioningOptionsE"></span><span id="_CPPv2N5arrow7dataset27KeyValuePartitioningOptionsE"></span><span id="arrow::dataset::KeyValuePartitioningOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_key_value_partitioning_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">KeyValuePartitioningOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset27KeyValuePartitioningOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Options for key-value based partitioning (hive/directory). </p>
<p>Subclassed by <a class="reference internal" href="#structarrow_1_1dataset_1_1_hive_partitioning_options"><span class="std std-ref">arrow::dataset::HivePartitioningOptions</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset27KeyValuePartitioningOptions16segment_encodingE">
<span id="_CPPv3N5arrow7dataset27KeyValuePartitioningOptions16segment_encodingE"></span><span id="_CPPv2N5arrow7dataset27KeyValuePartitioningOptions16segment_encodingE"></span><span id="arrow::dataset::KeyValuePartitioningOptions::segment_encoding__SegmentEncoding"></span><span class="target" id="structarrow_1_1dataset_1_1_key_value_partitioning_options_1a4cbfcbbde484a976ef6bd3c8e2957def"></span><a class="reference internal" href="#_CPPv415SegmentEncoding" title="SegmentEncoding"><span class="n"><span class="pre">SegmentEncoding</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">segment_encoding</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv415SegmentEncoding" title="SegmentEncoding"><span class="n"><span class="pre">SegmentEncoding</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N15SegmentEncoding3UriE" title="SegmentEncoding::Uri"><span class="n"><span class="pre">Uri</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset27KeyValuePartitioningOptions16segment_encodingE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>After splitting a path into components, decode the path components before parsing according to this scheme. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptionsE"></span><span id="arrow::dataset::PartitioningFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Options for inferring a partitioning. </p>
<p>Subclassed by <a class="reference internal" href="#structarrow_1_1dataset_1_1_hive_partitioning_factory_options"><span class="std std-ref">arrow::dataset::HivePartitioningFactoryOptions</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE"></span><span id="arrow::dataset::PartitioningFactoryOptions::infer_dictionary__b"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options_1a113c844746abc258034eb0edb6dd1da6"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">infer_dictionary</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>When inferring a schema for partition fields, yield dictionary encoded types instead of plain. </p>
<p>This can be more efficient when materializing virtual columns, and Expressions parsed by the finished <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> will include dictionaries of all unique inspected values for each field. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptions6schemaE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptions6schemaE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptions6schemaE"></span><span id="arrow::dataset::PartitioningFactoryOptions::schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options_1a1078577bc1b2126823c136735a8387b0"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptions6schemaE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Optionally, an expected schema can be provided, in which case inference will only check discovered fields against the schema and update internal state (such as dictionaries). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptions16segment_encodingE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptions16segment_encodingE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptions16segment_encodingE"></span><span id="arrow::dataset::PartitioningFactoryOptions::segment_encoding__SegmentEncoding"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options_1aa8ac2fc1ebcd511a9abbc713be5fd1af"></span><a class="reference internal" href="#_CPPv415SegmentEncoding" title="SegmentEncoding"><span class="n"><span class="pre">SegmentEncoding</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">segment_encoding</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv415SegmentEncoding" title="SegmentEncoding"><span class="n"><span class="pre">SegmentEncoding</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N15SegmentEncoding3UriE" title="SegmentEncoding::Uri"><span class="n"><span class="pre">Uri</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptions16segment_encodingE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>After splitting a path into components, decode the path components before parsing according to this scheme. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset30HivePartitioningFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset30HivePartitioningFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset30HivePartitioningFactoryOptionsE"></span><span id="arrow::dataset::HivePartitioningFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_hive_partitioning_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioningFactoryOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="arrow::dataset::PartitioningFactoryOptions"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset30HivePartitioningFactoryOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Options for inferring a hive-style partitioning. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE">
<span id="_CPPv3N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE"></span><span id="_CPPv2N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE"></span><span id="arrow::dataset::HivePartitioningFactoryOptions::null_fallback__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_hive_partitioning_factory_options_1a8e48cce66a027ba832654f1593be8b3e"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">null_fallback</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The hive partitioning scheme maps null to a hard coded fallback string. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19PartitioningFactoryE">
<span id="_CPPv3N5arrow7dataset19PartitioningFactoryE"></span><span id="_CPPv2N5arrow7dataset19PartitioningFactoryE"></span><span id="arrow::dataset::PartitioningFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">PartitioningFactory</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a> provides creation of a partitioning when the specific schema must be inferred from available paths (no explicit schema is known). </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19PartitioningFactory9type_nameEv">
<span id="_CPPv3NK5arrow7dataset19PartitioningFactory9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset19PartitioningFactory9type_nameEv"></span><span id="arrow::dataset::PartitioningFactory::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory_1afd3b563fc589b700ef23d2726ac6d518"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19PartitioningFactory9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::PartitioningFactory::Inspect__std::vector:ss:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory_1a14c6a93085e2b05aadac45d36d7ea483"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the schema for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
<p>This may reset internal state, for example dictionaries of unique representations. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::PartitioningFactory::Finish__std::shared_ptr:Schema:CRC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory_1a5b601acefcfda6236dad628bbce072fc"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a partitioning using the provided schema (fields may be dropped). </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20KeyValuePartitioningE">
<span id="_CPPv3N5arrow7dataset20KeyValuePartitioningE"></span><span id="_CPPv2N5arrow7dataset20KeyValuePartitioningE"></span><span id="arrow::dataset::KeyValuePartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_key_value_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">KeyValuePartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Subclass for the common case of a partitioning which yields an equality expression for each segment. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">arrow::dataset::DirectoryPartitioning</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_filename_partitioning"><span class="std std-ref">arrow::dataset::FilenamePartitioning</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_hive_partitioning"><span class="std std-ref">arrow::dataset::HivePartitioning</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE">
<span id="_CPPv3NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE"></span><span id="_CPPv2NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE"></span><span id="arrow::dataset::KeyValuePartitioning::Parse__ssCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_key_value_partitioning_1a36fd40087fe668a558a520b9a18380b1"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Parse</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Parse a path into a partition expression. </p>
</dd></dl>
</div>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20KeyValuePartitioning3KeyE">
<span id="_CPPv3N5arrow7dataset20KeyValuePartitioning3KeyE"></span><span id="_CPPv2N5arrow7dataset20KeyValuePartitioning3KeyE"></span><span id="arrow::dataset::KeyValuePartitioning::Key"></span><span class="target" id="structarrow_1_1dataset_1_1_key_value_partitioning_1_1_key"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Key</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset20KeyValuePartitioning3KeyE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>An unconverted equality expression consisting of a field name and the representation of a scalar value. </p>
</dd></dl>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21DirectoryPartitioningE">
<span id="_CPPv3N5arrow7dataset21DirectoryPartitioningE"></span><span id="_CPPv2N5arrow7dataset21DirectoryPartitioningE"></span><span id="arrow::dataset::DirectoryPartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DirectoryPartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="arrow::dataset::KeyValuePartitioning"><span class="n"><span class="pre">KeyValuePartitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset21DirectoryPartitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">DirectoryPartitioning</span></a> parses one segment of a path for each field in its schema. </p>
<p>All fields are required, so paths passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_key_value_partitioning_1a36fd40087fe668a558a520b9a18380b1"><span class="std std-ref">DirectoryPartitioning::Parse</span></a> must contain segments for each field.</p>
<p>For example given schema&lt;year:int16, month:int8&gt; the path “/2009/11” would be parsed to (“year”_ == 2009 and “month”_ == 11) </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions">
<span id="_CPPv3N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions"></span><span id="_CPPv2N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions"></span><span id="arrow::dataset::DirectoryPartitioning::DirectoryPartitioning__std::shared_ptr:Schema:.ArrayVector.KeyValuePartitioningOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning_1ad6863864df17c86cb509be940bf57fee"></span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DirectoryPartitioning</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">ArrayVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dictionaries</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset27KeyValuePartitioningOptionsE" title="arrow::dataset::KeyValuePartitioningOptions"><span class="n"><span class="pre">KeyValuePartitioningOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If a field in schema is of dictionary type, the corresponding element of dictionaries must be contain the dictionary of values for that field. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset21DirectoryPartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset21DirectoryPartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset21DirectoryPartitioning9type_nameEv"></span><span id="arrow::dataset::DirectoryPartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning_1ad56559bf0912952469c2f7bab7365a1d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset21DirectoryPartitioning9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions">
<span id="_CPPv3N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions"></span><span id="_CPPv2N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions"></span><span id="arrow::dataset::DirectoryPartitioning::MakeFactory__std::vector:ss:.PartitioningFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning_1af6105a7c349a4e87dd3646ffe1e8c275"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFactory</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">field_names</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="arrow::dataset::PartitioningFactoryOptions"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a factory for a directory partitioning. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>field_names</strong><strong>[in]</strong> The names for the partition fields. Types will be inferred. </p>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23HivePartitioningOptionsE">
<span id="_CPPv3N5arrow7dataset23HivePartitioningOptionsE"></span><span id="_CPPv2N5arrow7dataset23HivePartitioningOptionsE"></span><span id="arrow::dataset::HivePartitioningOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_hive_partitioning_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioningOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset27KeyValuePartitioningOptionsE" title="arrow::dataset::KeyValuePartitioningOptions"><span class="n"><span class="pre">KeyValuePartitioningOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset23HivePartitioningOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16HivePartitioningE">
<span id="_CPPv3N5arrow7dataset16HivePartitioningE"></span><span id="_CPPv2N5arrow7dataset16HivePartitioningE"></span><span id="arrow::dataset::HivePartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="arrow::dataset::KeyValuePartitioning"><span class="n"><span class="pre">KeyValuePartitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset16HivePartitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Multi-level, directory based partitioning originating from Apache Hive with all data files stored in the leaf directories. </p>
<p>Data is partitioned by static values of a particular column in the schema. Partition keys are represented in the form $key=$value in directory names. <a class="reference internal" href="datatype.html#classarrow_1_1_field"><span class="std std-ref">Field</span></a> order is ignored, as are missing or unrecognized field names.</p>
<p>For example given schema&lt;year:int16, month:int8, day:int8&gt; the path “/day=321/ignored=3.4/year=2009” parses to (“year”_ == 2009 and “day”_ == 321) </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE">
<span id="_CPPv3N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE"></span><span id="_CPPv2N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE"></span><span id="arrow::dataset::HivePartitioning::HivePartitioning__std::shared_ptr:Schema:.ArrayVector.ss"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning_1a59b84f3bc2f04df60572a3381d5d9a50"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioning</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">ArrayVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dictionaries</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">null_fallback</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv424kDefaultHiveNullFallback" title="kDefaultHiveNullFallback"><span class="n"><span class="pre">kDefaultHiveNullFallback</span></span></a><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If a field in schema is of dictionary type, the corresponding element of dictionaries must be contain the dictionary of values for that field. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset16HivePartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset16HivePartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset16HivePartitioning9type_nameEv"></span><span id="arrow::dataset::HivePartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning_1ac8729632a9681195f8c01ac58e2909b3"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset16HivePartitioning9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions">
<span id="_CPPv3N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions"></span><span id="_CPPv2N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions"></span><span id="arrow::dataset::HivePartitioning::MakeFactory__HivePartitioningFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning_1a4c5fb22aefbf30c6c379cf22c0afb067"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFactory</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset30HivePartitioningFactoryOptionsE" title="arrow::dataset::HivePartitioningFactoryOptions"><span class="n"><span class="pre">HivePartitioningFactoryOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a factory for a hive partitioning. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20FunctionPartitioningE">
<span id="_CPPv3N5arrow7dataset20FunctionPartitioningE"></span><span id="_CPPv2N5arrow7dataset20FunctionPartitioningE"></span><span id="arrow::dataset::FunctionPartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_function_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FunctionPartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset20FunctionPartitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Implementation provided by lambda or other callable. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20FunctionPartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset20FunctionPartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset20FunctionPartitioning9type_nameEv"></span><span id="arrow::dataset::FunctionPartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_function_partitioning_1a392a407e50f8e0d5ead69be21edfa435"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20FunctionPartitioning9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE">
<span id="_CPPv3NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE"></span><span id="_CPPv2NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE"></span><span id="arrow::dataset::FunctionPartitioning::Parse__ssCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_function_partitioning_1a8d256b444375d55b4265ffc524cb15b3"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Parse</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Parse a path into a partition expression. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20FilenamePartitioningE">
<span id="_CPPv3N5arrow7dataset20FilenamePartitioningE"></span><span id="_CPPv2N5arrow7dataset20FilenamePartitioningE"></span><span id="arrow::dataset::FilenamePartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_filename_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FilenamePartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="arrow::dataset::KeyValuePartitioning"><span class="n"><span class="pre">KeyValuePartitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset20FilenamePartitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20FilenamePartitioning20FilenamePartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions">
<span id="_CPPv3N5arrow7dataset20FilenamePartitioning20FilenamePartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions"></span><span id="_CPPv2N5arrow7dataset20FilenamePartitioning20FilenamePartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions"></span><span id="arrow::dataset::FilenamePartitioning::FilenamePartitioning__std::shared_ptr:Schema:.ArrayVector.KeyValuePartitioningOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_filename_partitioning_1a8f2103fe5c71b9cf31b88279edd5d13a"></span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FilenamePartitioning</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">ArrayVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dictionaries</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset27KeyValuePartitioningOptionsE" title="arrow::dataset::KeyValuePartitioningOptions"><span class="n"><span class="pre">KeyValuePartitioningOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset20FilenamePartitioning20FilenamePartitioningENSt10shared_ptrI6SchemaEE11ArrayVector27KeyValuePartitioningOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_filename_partitioning"><span class="std std-ref">FilenamePartitioning</span></a> from its components. </p>
<p>If a field in schema is of dictionary type, the corresponding element of dictionaries must be contain the dictionary of values for that field. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20FilenamePartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset20FilenamePartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset20FilenamePartitioning9type_nameEv"></span><span id="arrow::dataset::FilenamePartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_filename_partitioning_1aac4463bed4186540eab43c478cbc922d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20FilenamePartitioning9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20FilenamePartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions">
<span id="_CPPv3N5arrow7dataset20FilenamePartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions"></span><span id="_CPPv2N5arrow7dataset20FilenamePartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions"></span><span id="arrow::dataset::FilenamePartitioning::MakeFactory__std::vector:ss:.PartitioningFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_filename_partitioning_1a064e382b8952d4d180e64bf20b32714a"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFactory</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">field_names</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="arrow::dataset::PartitioningFactoryOptions"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset20FilenamePartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a factory for a filename partitioning. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>field_names</strong><strong>[in]</strong> The names for the partition fields. Types will be inferred. </p>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21PartitioningOrFactoryE">
<span id="_CPPv3N5arrow7dataset21PartitioningOrFactoryE"></span><span id="_CPPv2N5arrow7dataset21PartitioningOrFactoryE"></span><span id="arrow::dataset::PartitioningOrFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">PartitioningOrFactory</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21PartitioningOrFactoryE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/partition.h&gt;</em></div>
<p>Either a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> or a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset21PartitioningOrFactory12partitioningEv">
<span id="_CPPv3NK5arrow7dataset21PartitioningOrFactory12partitioningEv"></span><span id="_CPPv2NK5arrow7dataset21PartitioningOrFactory12partitioningEv"></span><span id="arrow::dataset::PartitioningOrFactory::partitioningC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory_1adf893490b52fd3761f75056d360aa040"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset21PartitioningOrFactory12partitioningEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The partitioning (if given). </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset21PartitioningOrFactory7factoryEv">
<span id="_CPPv3NK5arrow7dataset21PartitioningOrFactory7factoryEv"></span><span id="_CPPv2NK5arrow7dataset21PartitioningOrFactory7factoryEv"></span><span id="arrow::dataset::PartitioningOrFactory::factoryC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory_1aeaadd56fdfb179bc6b53e8fda83b9cd3"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">factory</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset21PartitioningOrFactory7factoryEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The partition factory (if given). </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::PartitioningOrFactory::GetOrInferSchema__std::vector:ss:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory_1afa9b03e3ed01361864661d267c7c2318"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetOrInferSchema</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the partition schema, inferring it with the given factory if needed. </p>
</dd></dl>
</div>
</dd></dl>
</section>
<section id="dataset-discovery-factories">
<h2>Dataset discovery/factories<a class="headerlink" href="#dataset-discovery-factories" title="Permalink to this heading">#</a></h2>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptionsE">
<span id="_CPPv3N5arrow7dataset14InspectOptionsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptionsE"></span><span id="arrow::dataset::InspectOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/discovery.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptions9fragmentsE">
<span id="_CPPv3N5arrow7dataset14InspectOptions9fragmentsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptions9fragmentsE"></span><span id="arrow::dataset::InspectOptions::fragments__i"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options_1a6a2b84b68816d279b5c3f94a3585b677"></span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragments</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptions9fragmentsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate how many fragments should be inspected to infer the unified dataset schema. </p>
<p>Limiting the number of fragments accessed improves the latency of the discovery process when dealing with a high number of fragments and/or high latency file systems.</p>
<p>The default value of <code class="docutils literal notranslate"><span class="pre">1</span></code> inspects the schema of the first (in no particular order) fragment only. If the dataset has a uniform schema for all fragments, this default is the optimal value. In order to inspect all fragments and robustly unify their potentially varying schemas, set this option to <code class="docutils literal notranslate"><span class="pre">kInspectAllFragments</span></code>. A value of <code class="docutils literal notranslate"><span class="pre">0</span></code> disables inspection of fragments altogether so only the partitioning schema will be inspected. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptions19field_merge_optionsE">
<span id="_CPPv3N5arrow7dataset14InspectOptions19field_merge_optionsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptions19field_merge_optionsE"></span><span id="arrow::dataset::InspectOptions::field_merge_options__Field::MergeOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options_1a5d104725b1311c5f075e114375ede34f"></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow5FieldE" title="arrow::Field"><span class="n"><span class="pre">Field</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow5Field12MergeOptionsE" title="arrow::Field::MergeOptions"><span class="n"><span class="pre">MergeOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">field_merge_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow5FieldE" title="arrow::Field"><span class="n"><span class="pre">Field</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow5Field12MergeOptionsE" title="arrow::Field::MergeOptions"><span class="n"><span class="pre">MergeOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow5Field12MergeOptions8DefaultsEv" title="arrow::Field::MergeOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptions19field_merge_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Control how to unify types. </p>
<p>By default, types are merged strictly (the type must match exactly, except nulls can be merged with other types). </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-attributes">Public Static Attributes</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptions20kInspectAllFragmentsE">
<span id="_CPPv3N5arrow7dataset14InspectOptions20kInspectAllFragmentsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptions20kInspectAllFragmentsE"></span><span id="arrow::dataset::InspectOptions::kInspectAllFragments__i"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options_1ac662ba0cbd1629a62797abc62f584be8"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kInspectAllFragments</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="o"><span class="pre">-</span></span><span class="m"><span class="pre">1</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptions20kInspectAllFragmentsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>See <code class="docutils literal notranslate"><span class="pre">fragments</span></code> property. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptionsE">
<span id="_CPPv3N5arrow7dataset13FinishOptionsE"></span><span id="_CPPv2N5arrow7dataset13FinishOptionsE"></span><span id="arrow::dataset::FinishOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FinishOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/discovery.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptions6schemaE">
<span id="_CPPv3N5arrow7dataset13FinishOptions6schemaE"></span><span id="_CPPv2N5arrow7dataset13FinishOptions6schemaE"></span><span id="arrow::dataset::FinishOptions::schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options_1a02b6442d0b26184f8016ab2b91d1aa8a"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptions6schemaE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Finalize the dataset with this given schema. </p>
<p>If the schema is not provided, infer the schema via the Inspect, see the <code class="docutils literal notranslate"><span class="pre">inspect_options</span></code> property. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptions15inspect_optionsE">
<span id="_CPPv3N5arrow7dataset13FinishOptions15inspect_optionsE"></span><span id="_CPPv2N5arrow7dataset13FinishOptions15inspect_optionsE"></span><span id="arrow::dataset::FinishOptions::inspect_options__InspectOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options_1a14dde92ea1ab7ea05923fb36ba1503d6"></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">inspect_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptions15inspect_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If the schema is not provided, it will be discovered by passing the following options to <code class="docutils literal notranslate"><span class="pre">DatasetDiscovery::Inspect</span></code>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptions18validate_fragmentsE">
<span id="_CPPv3N5arrow7dataset13FinishOptions18validate_fragmentsE"></span><span id="_CPPv2N5arrow7dataset13FinishOptions18validate_fragmentsE"></span><span id="arrow::dataset::FinishOptions::validate_fragments__b"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options_1ad26a9261c353cdd02860e38903eac807"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">validate_fragments</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptions18validate_fragmentsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the given <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a> (when specified), should be validated against the fragments’ schemas. </p>
<p><code class="docutils literal notranslate"><span class="pre">inspect_options</span></code> will control how many fragments are checked. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactoryE">
<span id="_CPPv3N5arrow7dataset14DatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset14DatasetFactoryE"></span><span id="arrow::dataset::DatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DatasetFactory</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/discovery.h&gt;</em></div>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a> provides a way to inspect/discover a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>’s expected schema before materializing said <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">arrow::dataset::FileSystemDatasetFactory</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_dataset_factory"><span class="std std-ref">arrow::dataset::ParquetDatasetFactory</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset_factory"><span class="std std-ref">arrow::dataset::UnionDatasetFactory</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::DatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a216b18c920df76f8dbe0eeead9ac3ff1"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the schemas of the Fragments and <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory7InspectE14InspectOptions">
<span id="_CPPv3N5arrow7dataset14DatasetFactory7InspectE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory7InspectE14InspectOptions"></span><span id="arrow::dataset::DatasetFactory::Inspect__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a0a15be09625b8c6ac2b9ef1889b6710a"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory7InspectE14InspectOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get unified schema for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory6FinishEv">
<span id="_CPPv3N5arrow7dataset14DatasetFactory6FinishEv"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory6FinishEv"></span><span id="arrow::dataset::DatasetFactory::Finish"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1af69a68f1c04cb70b5f3f2cb42a0643ea"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory6FinishEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::DatasetFactory::Finish__std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a05f84109ae8c822b27f63cd3fa289ace"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given schema (see <em>InspectOptions::schema</em>) </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset14DatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::DatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a4744d4cd12db41f9c29433af1fa730b4"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory6FinishE13FinishOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given options. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14DatasetFactory14root_partitionEv">
<span id="_CPPv3NK5arrow7dataset14DatasetFactory14root_partitionEv"></span><span id="_CPPv2NK5arrow7dataset14DatasetFactory14root_partitionEv"></span><span id="arrow::dataset::DatasetFactory::root_partitionC"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a00f7c257b39d4b90b48a2e4856ed2961"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">root_partition</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14DatasetFactory14root_partitionEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Optional root partition for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory16SetRootPartitionEN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset14DatasetFactory16SetRootPartitionEN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory16SetRootPartitionEN7compute10ExpressionE"></span><span id="arrow::dataset::DatasetFactory::SetRootPartition__compute::Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1ad4c1e7061038814f1e3ea80fb2cc8526"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SetRootPartition</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory16SetRootPartitionEN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the root partition for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
</section>
<section id="scanning">
<h2>Scanning<a class="headerlink" href="#scanning" title="Permalink to this heading">#</a></h2>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv426TaggedRecordBatchGenerator">
<span id="_CPPv326TaggedRecordBatchGenerator"></span><span id="_CPPv226TaggedRecordBatchGenerator"></span><span class="target" id="group__dataset-scanning_1gab483ab439e8b34006f3204005b0724ec"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TaggedRecordBatchGenerator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Future</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">TaggedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv426TaggedRecordBatchGenerator" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv425TaggedRecordBatchIterator">
<span id="_CPPv325TaggedRecordBatchIterator"></span><span id="_CPPv225TaggedRecordBatchIterator"></span><span class="target" id="group__dataset-scanning_1ga26c872b52e47a7e20f901d743df3bef9"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TaggedRecordBatchIterator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">Iterator</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">TaggedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv425TaggedRecordBatchIterator" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv430EnumeratedRecordBatchGenerator">
<span id="_CPPv330EnumeratedRecordBatchGenerator"></span><span id="_CPPv230EnumeratedRecordBatchGenerator"></span><span class="target" id="group__dataset-scanning_1ga2aab0007c4c0e475197811f993db724e"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnumeratedRecordBatchGenerator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Future</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">EnumeratedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv430EnumeratedRecordBatchGenerator" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv429EnumeratedRecordBatchIterator">
<span id="_CPPv329EnumeratedRecordBatchIterator"></span><span id="_CPPv229EnumeratedRecordBatchIterator"></span><span class="target" id="group__dataset-scanning_1gac15b45b87696d4c200949df0e7965ea1"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnumeratedRecordBatchIterator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">Iterator</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">EnumeratedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv429EnumeratedRecordBatchIterator" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv417kDefaultBatchSize">
<span id="_CPPv317kDefaultBatchSize"></span><span id="_CPPv217kDefaultBatchSize"></span><span id="kDefaultBatchSize__int64_t"></span><span class="target" id="group__dataset-scanning_1ga073228430616d6b28274a6f3e570e5d7"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultBatchSize</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1</span></span><span class="w"> </span><span class="o"><span class="pre">&lt;&lt;</span></span><span class="w"> </span><span class="m"><span class="pre">17</span></span><a class="headerlink" href="#_CPPv417kDefaultBatchSize" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv422kDefaultBatchReadahead">
<span id="_CPPv322kDefaultBatchReadahead"></span><span id="_CPPv222kDefaultBatchReadahead"></span><span id="kDefaultBatchReadahead__int32_t"></span><span class="target" id="group__dataset-scanning_1ga3eb0d0d3032ccacc02cbfe0a94ca5d6c"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultBatchReadahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">16</span></span><a class="headerlink" href="#_CPPv422kDefaultBatchReadahead" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv425kDefaultFragmentReadahead">
<span id="_CPPv325kDefaultFragmentReadahead"></span><span id="_CPPv225kDefaultFragmentReadahead"></span><span id="kDefaultFragmentReadahead__int32_t"></span><span class="target" id="group__dataset-scanning_1ga6f4b8db3cb8c1bcfc6874453d7ba40b4"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultFragmentReadahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">4</span></span><a class="headerlink" href="#_CPPv425kDefaultFragmentReadahead" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv422kDefaultBytesReadahead">
<span id="_CPPv322kDefaultBytesReadahead"></span><span id="_CPPv222kDefaultBytesReadahead"></span><span id="kDefaultBytesReadahead__int32_t"></span><span class="target" id="group__dataset-scanning_1ga23eed0bd54b0590109db95515d2c9e3b"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultBytesReadahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1</span></span><span class="w"> </span><span class="o"><span class="pre">&lt;&lt;</span></span><span class="w"> </span><span class="m"><span class="pre">25</span></span><a class="headerlink" href="#_CPPv422kDefaultBytesReadahead" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv413SetProjectionP11ScanOptions15ProjectionDescr">
<span id="_CPPv313SetProjectionP11ScanOptions15ProjectionDescr"></span><span id="_CPPv213SetProjectionP11ScanOptions15ProjectionDescr"></span><span id="SetProjection__ScanOptionsP.ProjectionDescr"></span><span class="target" id="group__dataset-scanning_1ga08756d4571dcd10a6e5e00f7cbed7af8"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SetProjection</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">ScanOptions</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">ProjectionDescr</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">projection</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv413SetProjectionP11ScanOptions15ProjectionDescr" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Utility method to set the projection expression and schema. </p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19FragmentScanOptionsE">
<span id="_CPPv3N5arrow7dataset19FragmentScanOptionsE"></span><span id="_CPPv2N5arrow7dataset19FragmentScanOptionsE"></span><span id="arrow::dataset::FragmentScanOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_scan_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FragmentScanOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/dataset.h&gt;</em></div>
<p>Per-scan options for fragment(s) in a dataset. </p>
<p>These options are not intrinsic to the format or fragment itself, but do affect the results of a scan. These are options which make sense to change between repeated reads of the same dataset, such as format-specific conversion options (that do not affect the schema). </p>
<p>Subclassed by <a class="reference internal" href="#structarrow_1_1dataset_1_1_csv_fragment_scan_options"><span class="std std-ref">arrow::dataset::CsvFragmentScanOptions</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_ipc_fragment_scan_options"><span class="std std-ref">arrow::dataset::IpcFragmentScanOptions</span></a>, <a class="reference internal" href="#structarrow_1_1dataset_1_1_json_fragment_scan_options"><span class="std std-ref">arrow::dataset::JsonFragmentScanOptions</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_fragment_scan_options"><span class="std std-ref">arrow::dataset::ParquetFragmentScanOptions</span></a></p>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptionsE">
<span id="_CPPv3N5arrow7dataset11ScanOptionsE"></span><span id="_CPPv2N5arrow7dataset11ScanOptionsE"></span><span id="arrow::dataset::ScanOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>Scan-specific options, which can be changed between scans of the same dataset. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset11ScanOptions18MaterializedFieldsEv">
<span id="_CPPv3NK5arrow7dataset11ScanOptions18MaterializedFieldsEv"></span><span id="_CPPv2NK5arrow7dataset11ScanOptions18MaterializedFieldsEv"></span><span id="arrow::dataset::ScanOptions::MaterializedFieldsC"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a209db8dec6bf9cc38e9f738bce59ccb7"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow8FieldRefE" title="arrow::FieldRef"><span class="n"><span class="pre">FieldRef</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MaterializedFields</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset11ScanOptions18MaterializedFieldsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return a vector of FieldRefs that require materialization. </p>
<p>This is usually the union of the fields referenced in the projection and the filter expression. Examples:</p>
<p><ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">a,</span> <span class="pre">b</span> <span class="pre">WHERE</span> <span class="pre">a</span> <span class="pre">&lt;</span> <span class="pre">2</span> <span class="pre">&amp;&amp;</span> <span class="pre">c</span> <span class="pre">&gt;</span> <span class="pre">1</span></code> =&gt; [“a”, “b”, “a”, “c”]</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">a</span> <span class="pre">+</span> <span class="pre">b</span> <span class="pre">&lt;</span> <span class="pre">3</span> <span class="pre">WHERE</span> <span class="pre">a</span> <span class="pre">&gt;</span> <span class="pre">1</span></code> =&gt; [“a”, “b”, “a”]</p></li>
</ul>
</p>
<p>This is needed for expression where a field may not be directly used in the final projection but is still required to evaluate the expression.</p>
<p>This is used by <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> implementations to apply the column sub-selection optimization. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions6filterE">
<span id="_CPPv3N5arrow7dataset11ScanOptions6filterE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions6filterE"></span><span id="arrow::dataset::ScanOptions::filter__compute::Expression"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a8a50e25edd3bfa9e2a0b2ba896022f20"></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">filter</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">literal</span></span><span class="p"><span class="pre">(</span></span><span class="k"><span class="pre">true</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions6filterE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A row filter (which will be pushed down to partitioning/reading if supported). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions10projectionE">
<span id="_CPPv3N5arrow7dataset11ScanOptions10projectionE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions10projectionE"></span><span id="arrow::dataset::ScanOptions::projection__compute::Expression"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a8d41950850f20861ac11d4890047ca2f"></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">projection</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions10projectionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A projection expression (which can add/remove/rename columns). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions14dataset_schemaE">
<span id="_CPPv3N5arrow7dataset11ScanOptions14dataset_schemaE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions14dataset_schemaE"></span><span id="arrow::dataset::ScanOptions::dataset_schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a6fbf0815c71758c41abfa26b06e0a5e1"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">dataset_schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions14dataset_schemaE" title="Permalink to this definition">#</a><br /></dt>
<dd><p><a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a> with which batches will be read from fragments. </p>
<p>This is also known as the “reader schema” it will be used (for example) in constructing CSV file readers to identify column types for parsing. Usually only a subset of its fields (see MaterializedFields) will be materialized during a scan. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions16projected_schemaE">
<span id="_CPPv3N5arrow7dataset11ScanOptions16projected_schemaE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions16projected_schemaE"></span><span id="arrow::dataset::ScanOptions::projected_schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a32a9c0b11c73afb80c15686595df8483"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">projected_schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions16projected_schemaE" title="Permalink to this definition">#</a><br /></dt>
<dd><p><a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a> of projected record batches. </p>
<p>This is independent of dataset_schema as its fields are derived from the projection. For example, let</p>
<p>dataset_schema = {“a”: int32, “b”: int32, “id”: utf8} projection = project({equal(field_ref(“a”), field_ref(“b”))}, {“a_plus_b”})</p>
<p>(no filter specified). In this case, the projected_schema would be</p>
<p>{“a_plus_b”: int32} </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions10batch_sizeE">
<span id="_CPPv3N5arrow7dataset11ScanOptions10batch_sizeE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions10batch_sizeE"></span><span id="arrow::dataset::ScanOptions::batch_size__int64_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a3c28eef3c8804a93c488a4861ad193ee"></span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">batch_size</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv417kDefaultBatchSize" title="kDefaultBatchSize"><span class="n"><span class="pre">kDefaultBatchSize</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions10batch_sizeE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Maximum row count for scanned batches. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions15batch_readaheadE">
<span id="_CPPv3N5arrow7dataset11ScanOptions15batch_readaheadE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions15batch_readaheadE"></span><span id="arrow::dataset::ScanOptions::batch_readahead__int32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a129a6281e006cb6681d6d246b0c6a99b"></span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">batch_readahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv422kDefaultBatchReadahead" title="kDefaultBatchReadahead"><span class="n"><span class="pre">kDefaultBatchReadahead</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions15batch_readaheadE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>How many batches to read ahead within a fragment. </p>
<p>Set to 0 to disable batch readahead</p>
<p>Note: May not be supported by all formats Note: Will be ignored if use_threads is set to false </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions18fragment_readaheadE">
<span id="_CPPv3N5arrow7dataset11ScanOptions18fragment_readaheadE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions18fragment_readaheadE"></span><span id="arrow::dataset::ScanOptions::fragment_readahead__int32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a63e5bc6675f808e576841fcf13545435"></span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragment_readahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv425kDefaultFragmentReadahead" title="kDefaultFragmentReadahead"><span class="n"><span class="pre">kDefaultFragmentReadahead</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions18fragment_readaheadE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>How many files to read ahead. </p>
<p>Set to 0 to disable fragment readahead</p>
<p>Note: May not be enforced by all scanners Note: Will be ignored if use_threads is set to false </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions4poolE">
<span id="_CPPv3N5arrow7dataset11ScanOptions4poolE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions4poolE"></span><span id="arrow::dataset::ScanOptions::pool__MemoryPoolP"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a90fc341733356314293b9bf414fa4610"></span><a class="reference internal" href="memory.html#_CPPv4N5arrow10MemoryPoolE" title="arrow::MemoryPool"><span class="n"><span class="pre">MemoryPool</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="sig-name descname"><span class="n"><span class="pre">pool</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="memory.html#_CPPv4N5arrow19default_memory_poolEv" title="arrow::default_memory_pool"><span class="n"><span class="pre">default_memory_pool</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions4poolE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A pool from which materialized and scanned arrays will be allocated. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions10io_contextE">
<span id="_CPPv3N5arrow7dataset11ScanOptions10io_contextE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions10io_contextE"></span><span id="arrow::dataset::ScanOptions::io_context__io::IOContext"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a2cbac92967c86d75605244a218ca66d1"></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">IOContext</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">io_context</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions10io_contextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>IOContext for any IO tasks. </p>
<p>Note: The IOContext executor will be ignored if use_threads is set to false </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions11use_threadsE">
<span id="_CPPv3N5arrow7dataset11ScanOptions11use_threadsE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions11use_threadsE"></span><span id="arrow::dataset::ScanOptions::use_threads__b"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a14c08f047d28e5a705ef1e275f11a98a"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">use_threads</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions11use_threadsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If true the scanner will scan in parallel. </p>
<p>Note: If true, this will use threads from both the cpu_executor and the io_context.executor Note: This must be true in order for any readahead to happen </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions21fragment_scan_optionsE">
<span id="_CPPv3N5arrow7dataset11ScanOptions21fragment_scan_optionsE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions21fragment_scan_optionsE"></span><span id="arrow::dataset::ScanOptions::fragment_scan_options__std::shared_ptr:FragmentScanOptions:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a10146664bc67f796084a54c6c961d3c3"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragment_scan_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions21fragment_scan_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Fragment-specific scan options. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions12backpressureE">
<span id="_CPPv3N5arrow7dataset11ScanOptions12backpressureE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions12backpressureE"></span><span id="arrow::dataset::ScanOptions::backpressure__acero::BackpressureOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a4262daa088af1b2022c2e3f6595db99c"></span><span class="n"><span class="pre">acero</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero19BackpressureOptionsE" title="arrow::acero::BackpressureOptions"><span class="n"><span class="pre">BackpressureOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">backpressure</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">acero</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero19BackpressureOptionsE" title="arrow::acero::BackpressureOptions"><span class="n"><span class="pre">BackpressureOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero19BackpressureOptions19DefaultBackpressureEv" title="arrow::acero::BackpressureOptions::DefaultBackpressure"><span class="n"><span class="pre">DefaultBackpressure</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions12backpressureE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Parameters which control when the plan should pause for a slow consumer. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2OptionsE">
<span id="_CPPv3N5arrow7dataset13ScanV2OptionsE"></span><span id="_CPPv2N5arrow7dataset13ScanV2OptionsE"></span><span id="arrow::dataset::ScanV2Options"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanV2Options</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">acero</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero15ExecNodeOptionsE" title="arrow::acero::ExecNodeOptions"><span class="n"><span class="pre">ExecNodeOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2OptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>Scan-specific options, which can be changed between scans of the same dataset. </p>
<p>A dataset consists of one or more individual fragments. A fragment is anything that is independently scannable, often a file.</p>
<p>Batches from all fragments will be converted to a single schema. This unified schema is referred to as the “dataset schema” and is the output schema for this node.</p>
<p>Individual fragments may have schemas that are different from the dataset schema. This is sometimes referred to as the physical or fragment schema. Conversion from the fragment schema to the dataset schema is a process known as evolution. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options7datasetE">
<span id="_CPPv3N5arrow7dataset13ScanV2Options7datasetE"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options7datasetE"></span><span id="arrow::dataset::ScanV2Options::dataset__std::shared_ptr:Dataset:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1adb58938b8f76b026d41eccdf403e0933"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">dataset</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options7datasetE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The dataset to scan. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options6filterE">
<span id="_CPPv3N5arrow7dataset13ScanV2Options6filterE"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options6filterE"></span><span id="arrow::dataset::ScanV2Options::filter__compute::Expression"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1a16ad9fd613777bddd0d2fee703d91630"></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">filter</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">literal</span></span><span class="p"><span class="pre">(</span></span><span class="k"><span class="pre">true</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options6filterE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A row filter. </p>
<p>The filter expression should be written against the dataset schema. The filter must be unbound.</p>
<p>This is an opportunistic pushdown filter. Filtering capabilities will vary between formats. If a format is not capable of applying the filter then it will ignore it.</p>
<p>Each fragment will do its best to filter the data based on the information (partitioning guarantees, statistics) available to it. If it is able to apply some filtering then it will indicate what filtering it was able to apply by attaching a guarantee to the batch.</p>
<p>For example, if a filter is x &lt; 50 &amp;&amp; y &gt; 40 then a batch may be able to apply a guarantee x &lt; 50. Post-scan filtering would then only need to consider y &gt; 40 (for this specific batch). The next batch may not be able to attach any guarantee and both clauses would need to be applied to that batch.</p>
<p>A single guarantee-aware filtering operation should generally be applied to all resulting batches. The scan node is not responsible for this.</p>
<p>Fields that are referenced by the filter should be included in the <code class="docutils literal notranslate"><span class="pre">columns</span></code><p>vector. The scan node will not automatically fetch fields referenced by the filter expression.</p>
<p>If the filter references fields that are not included in</p>
<code class="docutils literal notranslate"><span class="pre">columns</span></code> this may or may not be an error, depending on the format. <div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="#structarrow_1_1dataset_1_1_scan_v2_options_1acb1214b2d3c464bdc27912fc994325d0"><span class="std std-ref">AddFieldsNeededForFilter</span></a></p>
</div>
</p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options7columnsE">
<span id="_CPPv3N5arrow7dataset13ScanV2Options7columnsE"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options7columnsE"></span><span id="arrow::dataset::ScanV2Options::columns__std::vector:FieldPath:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1ac6ec9aaae71f74eeb5ed9269b9c2b37a"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow9FieldPathE" title="arrow::FieldPath"><span class="n"><span class="pre">FieldPath</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">columns</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options7columnsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The columns to scan. </p>
<p>This is not a simple list of top-level column indices but instead a set of paths allowing for partial selection of columns</p>
<p>These paths refer to the dataset schema</p>
<p>For example, consider the following dataset schema: schema({ field(“score”, <a class="reference internal" href="datatype.html#group__type-factories_1ga1713caac102cfa4db4a39e5ce7f642d8"><span class="std std-ref">int32()</span></a>), “marker”, struct_({ field(“color”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf9edf8ae2cb03d666b8653f1b4c1385f"><span class="std std-ref">utf8()</span></a>), field(“location”, struct_({ field(“x”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf8281c031294a423f8161bdf200c050f"><span class="std std-ref">float64()</span></a>), field(“y”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf8281c031294a423f8161bdf200c050f"><span class="std std-ref">float64()</span></a>) }) }) })</p>
<p>If <code class="docutils literal notranslate"><span class="pre">columns</span></code> is {{0}, {1,1,0}} then the output schema is: schema({field(“score”, <a class="reference internal" href="datatype.html#group__type-factories_1ga1713caac102cfa4db4a39e5ce7f642d8"><span class="std std-ref">int32()</span></a>), field(“x”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf8281c031294a423f8161bdf200c050f"><span class="std std-ref">float64()</span></a>)})</p>
<p>If <code class="docutils literal notranslate"><span class="pre">columns</span></code> is {{1,1,1}, {1,1}} then the output schema is: schema({ field(“y”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf8281c031294a423f8161bdf200c050f"><span class="std std-ref">float64()</span></a>), field(“location”, struct_({ field(“x”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf8281c031294a423f8161bdf200c050f"><span class="std std-ref">float64()</span></a>), field(“y”, <a class="reference internal" href="datatype.html#group__type-factories_1gaf8281c031294a423f8161bdf200c050f"><span class="std std-ref">float64()</span></a>) }) }) </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options22target_bytes_readaheadE">
<span id="_CPPv3N5arrow7dataset13ScanV2Options22target_bytes_readaheadE"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options22target_bytes_readaheadE"></span><span id="arrow::dataset::ScanV2Options::target_bytes_readahead__int32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1a3348308f9e30e1bfaeeb87daad75b8c9"></span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">target_bytes_readahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv422kDefaultBytesReadahead" title="kDefaultBytesReadahead"><span class="n"><span class="pre">kDefaultBytesReadahead</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options22target_bytes_readaheadE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Target number of bytes to read ahead in a fragment. </p>
<p>This limit involves some amount of estimation. Formats typically only know batch boundaries in terms of rows (not decoded bytes) and so an estimation must be done to guess the average row size. Other formats like CSV and JSON must make even more generalized guesses.</p>
<p>This is a best-effort guide. Some formats may need to read ahead further, for example, if scanning a parquet file that has batches with 100MiB of data then the actual readahead will be at least 100MiB</p>
<p>Set to 0 to disable readahead. When disabled, the scanner will read the dataset one batch at a time</p>
<p>This limit applies across all fragments. If the limit is 32MiB and the fragment readahead allows for 20 fragments to be read at once then the total readahead will still be 32MiB and NOT 20 * 32MiB. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options18fragment_readaheadE">
<span id="_CPPv3N5arrow7dataset13ScanV2Options18fragment_readaheadE"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options18fragment_readaheadE"></span><span id="arrow::dataset::ScanV2Options::fragment_readahead__int32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1aa5e15f77ccedb5178ed48dcc8598b585"></span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragment_readahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv425kDefaultFragmentReadahead" title="kDefaultFragmentReadahead"><span class="n"><span class="pre">kDefaultFragmentReadahead</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options18fragment_readaheadE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Number of fragments to read ahead. </p>
<p>Higher readahead will potentially lead to more efficient I/O but will lead to the scan operation using more RAM. The default is fairly conservative and designed for fast local disks (or slow local spinning disks which cannot handle much parallelism anyways). When using a highly parallel remote filesystem you will likely want to increase these values.</p>
<p>Set to 0 to disable fragment readahead. When disabled the dataset will be scanned one fragment at a time. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options14format_optionsE">
<span id="_CPPv3N5arrow7dataset13ScanV2Options14format_optionsE"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options14format_optionsE"></span><span id="arrow::dataset::ScanV2Options::format_options__FragmentScanOptionsCP"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1ace6f6320b2643500d457f775ab72e2c8"></span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="sig-name descname"><span class="n"><span class="pre">format_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options14format_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options specific to the file format. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options10AllColumnsERK6Schema">
<span id="_CPPv3N5arrow7dataset13ScanV2Options10AllColumnsERK6Schema"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options10AllColumnsERK6Schema"></span><span id="arrow::dataset::ScanV2Options::AllColumns__SchemaCR"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1a8427bca0bf19adcd18fe5984b19de95e"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow9FieldPathE" title="arrow::FieldPath"><span class="n"><span class="pre">FieldPath</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">AllColumns</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">dataset_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options10AllColumnsERK6Schema" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Utility method to get a selection representing all columns in a dataset. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13ScanV2Options24AddFieldsNeededForFilterEP13ScanV2Options">
<span id="_CPPv3N5arrow7dataset13ScanV2Options24AddFieldsNeededForFilterEP13ScanV2Options"></span><span id="_CPPv2N5arrow7dataset13ScanV2Options24AddFieldsNeededForFilterEP13ScanV2Options"></span><span id="arrow::dataset::ScanV2Options::AddFieldsNeededForFilter__ScanV2OptionsP"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_v2_options_1acb1214b2d3c464bdc27912fc994325d0"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">AddFieldsNeededForFilter</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13ScanV2OptionsE" title="arrow::dataset::ScanV2Options"><span class="n"><span class="pre">ScanV2Options</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset13ScanV2Options24AddFieldsNeededForFilterEP13ScanV2Options" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Utility method to add fields needed for the current filter. </p>
<p>This method adds any fields that are needed by <code class="docutils literal notranslate"><span class="pre">filter</span></code> which are not already included in the list of columns. Any new fields added will be added to the end in no particular order. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescrE">
<span id="_CPPv3N5arrow7dataset15ProjectionDescrE"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescrE"></span><span id="arrow::dataset::ProjectionDescr"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ProjectionDescr</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescrE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>Describes a projection. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescr10expressionE">
<span id="_CPPv3N5arrow7dataset15ProjectionDescr10expressionE"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescr10expressionE"></span><span id="arrow::dataset::ProjectionDescr::expression__compute::Expression"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr_1ad10d488061ad3e685ba44b0bba218fc7"></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">expression</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescr10expressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The projection expression itself This expression must be a call to make_struct. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescr6schemaE">
<span id="_CPPv3N5arrow7dataset15ProjectionDescr6schemaE"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescr6schemaE"></span><span id="arrow::dataset::ProjectionDescr::schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr_1aa7244f163308279c976c1c78d881ef3c"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescr6schemaE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The output schema of the projection. </p>
<p>This can be calculated from the input schema and the expression but it is cached here for convenience. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescr20FromStructExpressionERKN7compute10ExpressionERK6Schema">
<span id="_CPPv3N5arrow7dataset15ProjectionDescr20FromStructExpressionERKN7compute10ExpressionERK6Schema"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescr20FromStructExpressionERKN7compute10ExpressionERK6Schema"></span><span id="arrow::dataset::ProjectionDescr::FromStructExpression__compute::ExpressionCR.SchemaCR"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr_1a121ad17de2cc1554ce72094659c9d8fd"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset15ProjectionDescrE" title="arrow::dataset::ProjectionDescr"><span class="n"><span class="pre">ProjectionDescr</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FromStructExpression</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">expression</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">dataset_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescr20FromStructExpressionERKN7compute10ExpressionERK6Schema" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#structarrow_1_1dataset_1_1_projection_descr"><span class="std std-ref">ProjectionDescr</span></a> by binding an expression to the dataset schema. </p>
<p>expression must return a struct type </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescr15FromExpressionsENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEERK6Schema">
<span id="_CPPv3N5arrow7dataset15ProjectionDescr15FromExpressionsENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEERK6Schema"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescr15FromExpressionsENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEERK6Schema"></span><span id="arrow::dataset::ProjectionDescr::FromExpressions__std::vector:compute::Expression:.std::vector:ss:.SchemaCR"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr_1a4136f1fcafcc7f41a59bdb8c06b415c8"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset15ProjectionDescrE" title="arrow::dataset::ProjectionDescr"><span class="n"><span class="pre">ProjectionDescr</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FromExpressions</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">exprs</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">names</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">dataset_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescr15FromExpressionsENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEERK6Schema" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#structarrow_1_1dataset_1_1_projection_descr"><span class="std std-ref">ProjectionDescr</span></a> from expressions/names for each field. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescr9FromNamesENSt6vectorINSt6stringEEERK6Schema">
<span id="_CPPv3N5arrow7dataset15ProjectionDescr9FromNamesENSt6vectorINSt6stringEEERK6Schema"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescr9FromNamesENSt6vectorINSt6stringEEERK6Schema"></span><span id="arrow::dataset::ProjectionDescr::FromNames__std::vector:ss:.SchemaCR"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr_1a7ff9d88b63a48dabd093dfd6de84eb4b"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset15ProjectionDescrE" title="arrow::dataset::ProjectionDescr"><span class="n"><span class="pre">ProjectionDescr</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FromNames</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">names</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">dataset_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescr9FromNamesENSt6vectorINSt6stringEEERK6Schema" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a default projection referencing fields in the dataset schema. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ProjectionDescr7DefaultERK6Schema">
<span id="_CPPv3N5arrow7dataset15ProjectionDescr7DefaultERK6Schema"></span><span id="_CPPv2N5arrow7dataset15ProjectionDescr7DefaultERK6Schema"></span><span id="arrow::dataset::ProjectionDescr::Default__SchemaCR"></span><span class="target" id="structarrow_1_1dataset_1_1_projection_descr_1a762e65911f1e48f14dc11acb4c71d3f6"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset15ProjectionDescrE" title="arrow::dataset::ProjectionDescr"><span class="n"><span class="pre">ProjectionDescr</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Default</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">dataset_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15ProjectionDescr7DefaultERK6Schema" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Make a projection that projects every field in the dataset schema. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17TaggedRecordBatchE">
<span id="_CPPv3N5arrow7dataset17TaggedRecordBatchE"></span><span id="_CPPv2N5arrow7dataset17TaggedRecordBatchE"></span><span id="arrow::dataset::TaggedRecordBatch"></span><span class="target" id="structarrow_1_1dataset_1_1_tagged_record_batch"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TaggedRecordBatch</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset17TaggedRecordBatchE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>Combines a record batch with the fragment that the record batch originated from. </p>
<p>Knowing the source fragment can be useful for debugging &amp; understanding loaded data </p>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21EnumeratedRecordBatchE">
<span id="_CPPv3N5arrow7dataset21EnumeratedRecordBatchE"></span><span id="_CPPv2N5arrow7dataset21EnumeratedRecordBatchE"></span><span id="arrow::dataset::EnumeratedRecordBatch"></span><span class="target" id="structarrow_1_1dataset_1_1_enumerated_record_batch"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnumeratedRecordBatch</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21EnumeratedRecordBatchE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>Combines a tagged batch with positional information. </p>
<p>This is returned when scanning batches in an unordered fashion. This information is needed if you ever want to reassemble the batches in order </p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7ScannerE">
<span id="_CPPv3N5arrow7dataset7ScannerE"></span><span id="_CPPv2N5arrow7dataset7ScannerE"></span><span id="arrow::dataset::Scanner"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scanner</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7ScannerE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>A scanner glues together several dataset classes to load in data. </p>
<p>The dataset contains a collection of fragments and partitioning rules.</p>
<p>The fragments identify independently loadable units of data (i.e. each fragment has a potentially unique schema and possibly even format. It should be possible to read fragments in parallel if desired).</p>
<p>The fragment’s format contains the logic necessary to actually create a task to load the fragment into memory. That task may or may not support parallel execution of its own.</p>
<p>The scanner is then responsible for creating scan tasks from every fragment in the dataset and (potentially) sequencing the loaded record batches together.</p>
<p>The scanner should not buffer the entire dataset in memory (unless asked) instead yielding record batches as soon as they are ready to scan. Various readahead properties control how much data is allowed to be scanned before pausing to let a slow consumer catchup.</p>
<p>Today the scanner also handles projection &amp; filtering although that may change in the future. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE">
<span id="_CPPv3N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE"></span><span id="_CPPv2N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a343eb51ca6ada0ff70bf7a868214c1dd"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scan</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="p"><span class="pre">(</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset17TaggedRecordBatchE" title="arrow::dataset::TaggedRecordBatch"><span class="n"><span class="pre">TaggedRecordBatch</span></span></a><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">visitor</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Apply a visitor to each <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a> as it is scanned. </p>
<p>If multiple threads are used (via use_threads), the visitor will be invoked from those threads and is responsible for any synchronization. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner7ToTableEv">
<span id="_CPPv3N5arrow7dataset7Scanner7ToTableEv"></span><span id="_CPPv2N5arrow7dataset7Scanner7ToTableEv"></span><span id="arrow::dataset::Scanner::ToTable"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a706750f2cd07ba40507a7ba139c78ea5"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ToTable</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner7ToTableEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Convert a <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> into a <a class="reference internal" href="table.html#classarrow_1_1_table"><span class="std std-ref">Table</span></a>. </p>
<p>Use this convenience utility with care. This will serially materialize the Scan result in memory before creating the <a class="reference internal" href="table.html#classarrow_1_1_table"><span class="std std-ref">Table</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner11ScanBatchesEv">
<span id="_CPPv3N5arrow7dataset7Scanner11ScanBatchesEv"></span><span id="_CPPv2N5arrow7dataset7Scanner11ScanBatchesEv"></span><span id="arrow::dataset::Scanner::ScanBatches"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a8c511a98ed14b65d39d36347a4007612"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv425TaggedRecordBatchIterator" title="TaggedRecordBatchIterator"><span class="n"><span class="pre">TaggedRecordBatchIterator</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatches</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner11ScanBatchesEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Scan the dataset into a stream of record batches. </p>
<p>Each batch is tagged with the fragment it originated from. The batches will arrive in order. The order of fragments is determined by the dataset.</p>
<p>Note: The scanner will perform some readahead but will avoid materializing too much in memory (this is goverended by the readahead options and use_threads option). If the readahead queue fills up then I/O will pause until the calling thread catches up. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner20ScanBatchesUnorderedEv">
<span id="_CPPv3N5arrow7dataset7Scanner20ScanBatchesUnorderedEv"></span><span id="_CPPv2N5arrow7dataset7Scanner20ScanBatchesUnorderedEv"></span><span id="arrow::dataset::Scanner::ScanBatchesUnordered"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a9b7e324773eeffc7795cb1e949df85ce"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv429EnumeratedRecordBatchIterator" title="EnumeratedRecordBatchIterator"><span class="n"><span class="pre">EnumeratedRecordBatchIterator</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesUnordered</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner20ScanBatchesUnorderedEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Scan the dataset into a stream of record batches. </p>
<p>Unlike ScanBatches this method may allow record batches to be returned out of order. This allows for more efficient scanning: some fragments may be accessed more quickly than others (e.g. may be cached in RAM or just happen to get scheduled earlier by the I/O)</p>
<p>To make up for the out-of-order iteration each batch is further tagged with positional information. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner8TakeRowsERK5Array">
<span id="_CPPv3N5arrow7dataset7Scanner8TakeRowsERK5Array"></span><span id="_CPPv2N5arrow7dataset7Scanner8TakeRowsERK5Array"></span><span id="arrow::dataset::Scanner::TakeRows__ArrayCR"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1abf4b346afc570b1023ecd75627ecfaae"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TakeRows</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="array.html#_CPPv4N5arrow5ArrayE" title="arrow::Array"><span class="n"><span class="pre">Array</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">indices</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner8TakeRowsERK5Array" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A convenience to synchronously load the given rows by index. </p>
<p>Will only consume as many batches as needed from <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner_1a8c511a98ed14b65d39d36347a4007612"><span class="std std-ref">ScanBatches()</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner4HeadE7int64_t">
<span id="_CPPv3N5arrow7dataset7Scanner4HeadE7int64_t"></span><span id="_CPPv2N5arrow7dataset7Scanner4HeadE7int64_t"></span><span id="arrow::dataset::Scanner::Head__int64_t"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a10d6cabf75ac43c32fff472c97673daf"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Head</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">num_rows</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner4HeadE7int64_t" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the first N rows. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner9CountRowsEv">
<span id="_CPPv3N5arrow7dataset7Scanner9CountRowsEv"></span><span id="_CPPv2N5arrow7dataset7Scanner9CountRowsEv"></span><span id="arrow::dataset::Scanner::CountRows"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a0548ff4c34bf8763bc2067c37459fad2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">int64_t</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CountRows</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner9CountRowsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Count rows matching a predicate. </p>
<p>This method will push down the predicate and compute the result based on fragment metadata if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner19ToRecordBatchReaderEv">
<span id="_CPPv3N5arrow7dataset7Scanner19ToRecordBatchReaderEv"></span><span id="_CPPv2N5arrow7dataset7Scanner19ToRecordBatchReaderEv"></span><span id="arrow::dataset::Scanner::ToRecordBatchReader"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a4c39dc64f6939bc205ee9a7ca9fac8a7"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><span class="n"><span class="pre">RecordBatchReader</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ToRecordBatchReader</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner19ToRecordBatchReaderEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Convert the <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> to a <a class="reference internal" href="table.html#classarrow_1_1_record_batch_reader"><span class="std std-ref">RecordBatchReader</span></a> so it can be easily used with APIs that expect a reader. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Scanner7optionsEv">
<span id="_CPPv3NK5arrow7dataset7Scanner7optionsEv"></span><span id="_CPPv2NK5arrow7dataset7Scanner7optionsEv"></span><span id="arrow::dataset::Scanner::optionsC"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1adddf7d3dea394e017624ad7ea6c27b71"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">options</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Scanner7optionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the options for this scan. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Scanner7datasetEv">
<span id="_CPPv3NK5arrow7dataset7Scanner7datasetEv"></span><span id="_CPPv2NK5arrow7dataset7Scanner7datasetEv"></span><span id="arrow::dataset::Scanner::datasetC"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a93e2babbd9e898ec56304c56beb99b0f"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">dataset</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Scanner7datasetEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the dataset that this scanner will scan. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilderE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilderE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilderE"></span><span id="arrow::dataset::ScannerBuilder"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScannerBuilder</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilderE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner_builder"><span class="std std-ref">ScannerBuilder</span></a> is a factory class to construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a>. </p>
<p>It is used to pass information, notably a potential filter expression and a subset of columns to materialize. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::ScannerBuilder::Project__std::vector:ss:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a15da200cbacbf327be99a3f6952ab21a"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Project</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">columns</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the subset of columns to materialize. </p>
<p>Columns which are not referenced may not be read from fragments.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>columns</strong><strong>[in]</strong> list of columns to project. Order and duplicates will be preserved.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Failure if any column name does not exists in the dataset’s <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a>. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::ScannerBuilder::Project__std::vector:compute::Expression:.std::vector:ss:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a5c4bf57a2fb9ca8d94619d81ef1bac78"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Project</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">exprs</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">names</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorIN7compute10ExpressionEEENSt6vectorINSt6stringEEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set expressions which will be evaluated to produce the materialized columns. </p>
<p>Columns which are not referenced may not be read from fragments.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>exprs</strong><strong>[in]</strong> expressions to evaluate to produce columns. </p></li>
<li><p><strong>names</strong><strong>[in]</strong> list of names for the resulting columns.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Failure if any referenced column does not exists in the dataset’s <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a>. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder6FilterERKN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder6FilterERKN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder6FilterERKN7compute10ExpressionE"></span><span id="arrow::dataset::ScannerBuilder::Filter__compute::ExpressionCR"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a1b1592251dd5f4b0e68316f2fedc5eac"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Filter</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">filter</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder6FilterERKN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the filter expression to return only rows matching the filter. </p>
<p>The predicate will be passed down to Sources and corresponding Fragments to exploit predicate pushdown if possible using partition information or <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> internal metadata, e.g. Parquet statistics. Columns which are not referenced may not be read from fragments.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>filter</strong><strong>[in]</strong> expression to filter rows with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Failure if any referenced columns does not exist in the dataset’s <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a>. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder10UseThreadsEb">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder10UseThreadsEb"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder10UseThreadsEb"></span><span id="arrow::dataset::ScannerBuilder::UseThreads__b"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1ab10faa196ec9a3a74d47681f1283097e"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">UseThreads</span></span></span><span class="sig-paren">(</span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">use_threads</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">true</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder10UseThreadsEb" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> should make use of the available ThreadPool found in <a class="reference internal" href="#structarrow_1_1dataset_1_1_scan_options"><span class="std std-ref">ScanOptions</span></a>;. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t"></span><span id="arrow::dataset::ScannerBuilder::BatchSize__int64_t"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1adc02c1cc2b9be99cf4d5e2f91cd23458"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">BatchSize</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">batch_size</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the maximum number of rows per <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a>. </p>
<p>
This option provides a control limiting the memory owned by any <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a>. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>batch_size</strong><strong>[in]</strong> the maximum number of rows. </p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>An error if the number for batch is not greater than 0.</p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder14BatchReadaheadE7int32_t">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder14BatchReadaheadE7int32_t"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder14BatchReadaheadE7int32_t"></span><span id="arrow::dataset::ScannerBuilder::BatchReadahead__int32_t"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1aef713a3ea70facd71972d88ea2eae0de"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">BatchReadahead</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">batch_readahead</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder14BatchReadaheadE7int32_t" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the number of batches to read ahead within a fragment. </p>
<p>
This option provides a control on the RAM vs I/O tradeoff. It might not be supported by all file formats, in which case it will simply be ignored. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>batch_readahead</strong><strong>[in]</strong> How many batches to read ahead within a fragment </p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an error if this number is less than 0.</p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder17FragmentReadaheadE7int32_t">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder17FragmentReadaheadE7int32_t"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder17FragmentReadaheadE7int32_t"></span><span id="arrow::dataset::ScannerBuilder::FragmentReadahead__int32_t"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a13b01e654d09bc7bc74abab845dc72e5"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FragmentReadahead</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">fragment_readahead</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder17FragmentReadaheadE7int32_t" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the number of fragments to read ahead. </p>
<p>
This option provides a control on the RAM vs I/O tradeoff. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>fragment_readahead</strong><strong>[in]</strong> How many fragments to read ahead </p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>an error if this number is less than 0.</p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool"></span><span id="arrow::dataset::ScannerBuilder::Pool__MemoryPoolP"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1acc144563c285acf7b0397d9878214d9d"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Pool</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="memory.html#_CPPv4N5arrow10MemoryPoolE" title="arrow::MemoryPool"><span class="n"><span class="pre">MemoryPool</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">pool</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set the pool from which materialized and scanned arrays will be allocated. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE"></span><span id="arrow::dataset::ScannerBuilder::FragmentScanOptions__std::shared_ptr:FragmentScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a93f1115f1ddb452e304a23e7ed68cb4c"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FragmentScanOptions</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE" title="arrow::dataset::ScannerBuilder::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">fragment_scan_options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Set fragment-specific scan options. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder12BackpressureEN5acero19BackpressureOptionsE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder12BackpressureEN5acero19BackpressureOptionsE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder12BackpressureEN5acero19BackpressureOptionsE"></span><span id="arrow::dataset::ScannerBuilder::Backpressure__acero::BackpressureOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a3c786410c94883c5baee280572606ee0"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Backpressure</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">acero</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero19BackpressureOptionsE" title="arrow::acero::BackpressureOptions"><span class="n"><span class="pre">BackpressureOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">backpressure</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder12BackpressureEN5acero19BackpressureOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Override default backpressure configuration. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder14GetScanOptionsEv">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder14GetScanOptionsEv"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder14GetScanOptionsEv"></span><span id="arrow::dataset::ScannerBuilder::GetScanOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1add021b12ea2fbf4c55b35fe47777fc69"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetScanOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder14GetScanOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the current scan options for the builder. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder6FinishEv">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder6FinishEv"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder6FinishEv"></span><span id="arrow::dataset::ScannerBuilder::Finish"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a5fcc65be277f4e76ef89e16b9799168e"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><span class="n"><span class="pre">Scanner</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder6FinishEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the constructed now-immutable <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> object. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder21FromRecordBatchReaderENSt10shared_ptrI17RecordBatchReaderEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder21FromRecordBatchReaderENSt10shared_ptrI17RecordBatchReaderEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder21FromRecordBatchReaderENSt10shared_ptrI17RecordBatchReaderEE"></span><span id="arrow::dataset::ScannerBuilder::FromRecordBatchReader__std::shared_ptr:RecordBatchReader:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a577cd3040147e76acbcd886d9a59ea0c"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14ScannerBuilderE" title="arrow::dataset::ScannerBuilder"><span class="n"><span class="pre">ScannerBuilder</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FromRecordBatchReader</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><span class="n"><span class="pre">RecordBatchReader</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">reader</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder21FromRecordBatchReaderENSt10shared_ptrI17RecordBatchReaderEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Make a scanner from a record batch reader. </p>
<p>The resulting scanner can be scanned only once. This is intended to support writing data from streaming sources or other sources that can be iterated only once. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15ScanNodeOptionsE">
<span id="_CPPv3N5arrow7dataset15ScanNodeOptionsE"></span><span id="_CPPv2N5arrow7dataset15ScanNodeOptionsE"></span><span id="arrow::dataset::ScanNodeOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_scan_node_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanNodeOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">acero</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero15ExecNodeOptionsE" title="arrow::acero::ExecNodeOptions"><span class="n"><span class="pre">ExecNodeOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset15ScanNodeOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/scanner.h&gt;</em></div>
<p>Construct a source ExecNode which yields batches from a dataset scan. </p>
<p>Does not construct associated filter or project nodes. Yielded batches will be augmented with fragment/batch indices to enable stable ordering for simple ExecPlans. </p>
</dd></dl>
</section>
<section id="concrete-implementations">
<h2>Concrete implementations<a class="headerlink" href="#concrete-implementations" title="Permalink to this heading">#</a></h2>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragmentE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragmentE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragmentE"></span><span id="arrow::dataset::InMemoryFragment"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryFragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragmentE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/dataset.h&gt;</em></div>
<p>A trivial <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> that yields ScanTask out of a fixed set of <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::InMemoryFragment::ScanBatchesAsync__std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment_1a8ed0cd26e0427cac2447a309d5ae3f5d"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesAsync</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>An asynchronous version of Scan. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::InMemoryFragment::CountRows__compute::Expression.std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment_1ac17404681286957ee1bd3bc7f1735907"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">optional</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">int64_t</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CountRows</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Count the number of rows in this fragment matching the filter using metadata only. </p>
<p>That is, this method may perform I/O, but will not load data.</p>
<p>If this is not possible, resolve with an empty optional. The fragment can perform I/O (e.g. to read metadata) before it deciding whether it can satisfy the request. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::InMemoryFragment::InspectFragment__FragmentScanOptionsCP.compute::ExecContextP"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment_1a45e842ec5ed47203f1dcb6c2ba0286a8"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">InspectedFragment</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectFragment</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Inspect a fragment to learn basic information. </p>
<p>This will be called before a scan and a fragment should attach whatever information will be needed to figure out an evolution strategy. This information will then be passed to the call to BeginScan </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::InMemoryFragment::BeginScan__FragmentScanRequestCR.InspectedFragmentCR.FragmentScanOptionsCP.compute::ExecContextP"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment_1a8ba4d0afbcce6afb0619d49825253a54"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">FragmentScanner</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">BeginScan</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">FragmentScanRequest</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">request</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">InspectedFragment</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">inspected_fragment</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Start a scan operation. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDatasetE">
<span id="_CPPv3N5arrow7dataset15InMemoryDatasetE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDatasetE"></span><span id="arrow::dataset::InMemoryDataset"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDatasetE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/dataset.h&gt;</em></div>
<p>A Source which yields fragments wrapping a stream of record batches. </p>
<p>The record batches must match the schema provided to the source at construction. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE"></span><span id="arrow::dataset::InMemoryDataset::InMemoryDataset__std::shared_ptr:Schema:.std::shared_ptr:RecordBatchGenerator:"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1ac0db38bfc1de7b4d13240efa6f47662c"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE" title="arrow::dataset::InMemoryDataset::RecordBatchGenerator"><span class="n"><span class="pre">RecordBatchGenerator</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">get_batches</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Construct a dataset from a schema and a factory of record batch iterators. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector"></span><span id="arrow::dataset::InMemoryDataset::InMemoryDataset__std::shared_ptr:Schema:.RecordBatchVector"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1a479b929b6e00e4722052a6e7b0a231fe"></span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">RecordBatchVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">batches</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Convenience constructor taking a fixed list of batches. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE"></span><span id="arrow::dataset::InMemoryDataset::InMemoryDataset__std::shared_ptr:Table:"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1ab589f1c3da256de2ab3cde77d9b86965"></span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">table</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Convenience constructor taking a <a class="reference internal" href="table.html#classarrow_1_1_table"><span class="std std-ref">Table</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset15InMemoryDataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset15InMemoryDataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset15InMemoryDataset9type_nameEv"></span><span id="arrow::dataset::InMemoryDataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1ae39b5d039ae08ed1c6e17bfe1641f380"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset15InMemoryDataset9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::InMemoryDataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1a28d46d3d11d41e4d12f7157a57b6eca4"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return a copy of this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with a different schema. </p>
<p>The copy will view the same Fragments. If the new schema is not compatible with the original dataset’s schema then an error will be raised. </p>
</dd></dl>
</div>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE"></span><span id="arrow::dataset::InMemoryDataset::RecordBatchGenerator"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1_1_record_batch_generator"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">RecordBatchGenerator</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/dataset.h&gt;</em></div>
</dd></dl>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12UnionDatasetE">
<span id="_CPPv3N5arrow7dataset12UnionDatasetE"></span><span id="_CPPv2N5arrow7dataset12UnionDatasetE"></span><span id="arrow::dataset::UnionDataset"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">UnionDataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset12UnionDatasetE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/dataset.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> wrapping child Datasets. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12UnionDataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset12UnionDataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset12UnionDataset9type_nameEv"></span><span id="arrow::dataset::UnionDataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_1a87c9d5cb26b147127ce28259298ce52a"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12UnionDataset9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::UnionDataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_1a12415d746f8c71d5af09e53818779aae"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return a copy of this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with a different schema. </p>
<p>The copy will view the same Fragments. If the new schema is not compatible with the original dataset’s schema then an error will be raised. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector">
<span id="_CPPv3N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector"></span><span id="_CPPv2N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector"></span><span id="arrow::dataset::UnionDataset::Make__std::shared_ptr:Schema:.DatasetVector"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_1a9005125d1f96e53f41bad18a9e28b5b4"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12UnionDatasetE" title="arrow::dataset::UnionDataset"><span class="n"><span class="pre">UnionDataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">DatasetVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">children</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset"><span class="std std-ref">UnionDataset</span></a> wrapping child Datasets. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong><strong>[in]</strong> the schema of the resulting dataset. </p></li>
<li><p><strong>children</strong><strong>[in]</strong> one or more child Datasets. Their schemas must be identical to schema. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19UnionDatasetFactoryE">
<span id="_CPPv3N5arrow7dataset19UnionDatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset19UnionDatasetFactoryE"></span><span id="arrow::dataset::UnionDatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">UnionDatasetFactory</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset19UnionDatasetFactoryE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/discovery.h&gt;</em></div>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a> provides a way to inspect/discover a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>’s expected schema before materialization. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19UnionDatasetFactory9factoriesEv">
<span id="_CPPv3NK5arrow7dataset19UnionDatasetFactory9factoriesEv"></span><span id="_CPPv2NK5arrow7dataset19UnionDatasetFactory9factoriesEv"></span><span id="arrow::dataset::UnionDatasetFactory::factoriesC"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory_1ab8c9b0708cbd6c7c5780d4f05b577b52"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">factories</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19UnionDatasetFactory9factoriesEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the list of child <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::UnionDatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory_1ad749b1d98f15204a9680b4ba06e4575b"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the schemas of the Datasets. </p>
<p>Instead of applying options globally, it applies at each child factory. This will not respect <code class="docutils literal notranslate"><span class="pre">options.fragments</span></code> exactly, but will respect the spirit of peeking the first fragments or all of them. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::UnionDatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory_1ae3c038ae756ed8e63a61a0138d8dc0a5"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
<section id="file-system-datasets">
<h3>File System Datasets<a class="headerlink" href="#file-system-datasets" title="Permalink to this heading">#</a></h3>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptionsE"></span><span id="arrow::dataset::FileSystemFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/discovery.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions12partitioningE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions12partitioningE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions12partitioningE"></span><span id="arrow::dataset::FileSystemFactoryOptions::partitioning__PartitioningOrFactory"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1a69af22fb504696928af8963c04af38bb"></span><a class="reference internal" href="#_CPPv4N5arrow7dataset21PartitioningOrFactoryE" title="arrow::dataset::PartitioningOrFactory"><span class="n"><span class="pre">PartitioningOrFactory</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12Partitioning7DefaultEv" title="arrow::dataset::Partitioning::Default"><span class="n"><span class="pre">Default</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions12partitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Either an explicit <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> or a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a> to discover one. </p>
<p>If a factory is provided, it will be used to infer a schema for partition fields based on file and directory paths then construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. The default is a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> which will yield no partition information.</p>
<p>The (explicit or discovered) partitioning will be applied to discovered files and the resulting partition information embedded in the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE"></span><span id="arrow::dataset::FileSystemFactoryOptions::partition_base_dir__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1a5bfddc4e322fab63c777587eecfcf71f"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partition_base_dir</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>For the purposes of applying the partitioning, paths will be stripped of the partition_base_dir. </p>
<p>Files not matching the partition_base_dir prefix will be skipped for partition discovery. The ignored files will still be part of the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>, but will not have partition information.</p>
<p>Example: partition_base_dir = “/dataset”;</p>
<p><ul class="simple">
<li><p>“/dataset/US/sales.csv” -&gt; “US/sales.csv” will be given to the partitioning</p></li>
<li><p>”/home/john/late_sales.csv” -&gt; Will be ignored for partition discovery.</p></li>
</ul>
</p>
<p>This is useful for partitioning which parses directory when ordering is important, e.g. <a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">DirectoryPartitioning</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE"></span><span id="arrow::dataset::FileSystemFactoryOptions::exclude_invalid_files__b"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1afe6dcef4df15e0f633068fde243e58c9"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">exclude_invalid_files</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Invalid files (via selector or explicitly) will be excluded by checking with the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format_1ae934a7b69d0402f36aecfa6037fa1343"><span class="std std-ref">FileFormat::IsSupported</span></a> method. </p>
<p>This will incur IO for each files in a serial and single threaded fashion. Disabling this feature will skip the IO, but unsupported files may be present in the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> (resulting in an error at scan time). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE"></span><span id="arrow::dataset::FileSystemFactoryOptions::selector_ignore_prefixes__std::vector:ss:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1af00c6508a0956e77d535c7cd764b33a4"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">selector_ignore_prefixes</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="s"><span class="pre">&quot;.&quot;</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;_&quot;</span></span><span class="p"><span class="pre">,</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>When discovering from a Selector (and not from an explicit file list), ignore files and directories matching any of these prefixes. </p>
<p>Example (with selector = “/dataset/&amp;zwj;**”): selector_ignore_prefixes = {“_”, “.DS_STORE” };</p>
<p><ul class="simple">
<li><p>“/dataset/data.csv” -&gt; not ignored</p></li>
<li><p>”/dataset/_metadata” -&gt; ignored</p></li>
<li><p>”/dataset/.DS_STORE” -&gt; ignored</p></li>
<li><p>”/dataset/_hidden/dat” -&gt; ignored</p></li>
<li><p>”/dataset/nested/.DS_STORE” -&gt; ignored </p></li>
</ul>
</p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactoryE">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactoryE"></span><span id="arrow::dataset::FileSystemDatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemDatasetFactory</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactoryE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/discovery.h&gt;</em></div>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> creates a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> from a vector of <a class="reference internal" href="filesystem.html#structarrow_1_1fs_1_1_file_info"><span class="std std-ref">fs::FileInfo</span></a> or a <a class="reference internal" href="filesystem.html#structarrow_1_1fs_1_1_file_selector"><span class="std std-ref">fs::FileSelector</span></a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a89747f36564fd22e1110048aa3607535"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the schemas of the Fragments and <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1ac2f094573a36e0b76601a5e6c0517c9a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given options. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__std::shared_ptr:fs::FileSystem:.std::vector:ss:CR.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a2be4ad879d3e94308e8ae08b7d148044"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from an explicit list of paths. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>filesystem</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>paths</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__std::shared_ptr:fs::FileSystem:.fs::FileSelector.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a86b89c3ebe5cdab3cf7f7dbd9a633359"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs12FileSelectorE" title="arrow::fs::FileSelector"><span class="n"><span class="pre">FileSelector</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">selector</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from a <a class="reference internal" href="filesystem.html#structarrow_1_1fs_1_1_file_selector"><span class="std std-ref">fs::FileSelector</span></a>. </p>
<p>The selector will expand to a vector of FileInfo. The expansion/crawling is performed in this function call. Thus, the finalized <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> is working with a snapshot of the filesystem. If options.partition_base_dir is not provided, it will be overwritten with selector.base_dir.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>filesystem</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>selector</strong><strong>[in]</strong> used to crawl and search files </p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__ss.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1ab3e9380381cab2b628c4b23ae8690983"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">uri</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from an uri including filesystem information. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>uri</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorIN2fs8FileInfoEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorIN2fs8FileInfoEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorIN2fs8FileInfoEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__std::shared_ptr:fs::FileSystem:.std::vector:fs::FileInfo:CR.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a68514387cd1cca3f77e0064446f27fec"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs8FileInfoE" title="arrow::fs::FileInfo"><span class="n"><span class="pre">FileInfo</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">files</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorIN2fs8FileInfoEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from an explicit list of file information. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>filesystem</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>files</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a></p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileSourceE">
<span id="_CPPv3N5arrow7dataset10FileSourceE"></span><span id="_CPPv2N5arrow7dataset10FileSourceE"></span><span id="arrow::dataset::FileSource"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileSource</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">util</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">EqualityComparable</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileSourceE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>The path and filesystem where an actual file is located or a buffer which can be read like a file. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource11compressionEv">
<span id="_CPPv3NK5arrow7dataset10FileSource11compressionEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource11compressionEv"></span><span id="arrow::dataset::FileSource::compressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a083a2d7e4b982b4f2df40faa1c252fab"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="n"><span class="pre">Compression</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="utilities.html#_CPPv4N5arrow11Compression4typeE" title="arrow::Compression::type"><span class="n"><span class="pre">type</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">compression</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource11compressionEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the type of raw compression on the file, if any. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource4pathEv">
<span id="_CPPv3NK5arrow7dataset10FileSource4pathEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource4pathEv"></span><span id="arrow::dataset::FileSource::pathC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1ac357952408c00a9c4dcc347a237c759d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">path</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource4pathEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the file path, if any. Only valid when file source wraps a path. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource10filesystemEv">
<span id="_CPPv3NK5arrow7dataset10FileSource10filesystemEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource10filesystemEv"></span><span id="arrow::dataset::FileSource::filesystemC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a364996bb9fcb7ba51b852c4d82769a70"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">filesystem</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource10filesystemEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the filesystem, if any. Otherwise returns nullptr. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource6bufferEv">
<span id="_CPPv3NK5arrow7dataset10FileSource6bufferEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource6bufferEv"></span><span id="arrow::dataset::FileSource::bufferC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a5d289e2aced706dc4f923cf4af801fdd"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="memory.html#_CPPv4N5arrow6BufferE" title="arrow::Buffer"><span class="n"><span class="pre">Buffer</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">buffer</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource6bufferEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the buffer containing the file, if any. Otherwise returns nullptr. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource4OpenEv">
<span id="_CPPv3NK5arrow7dataset10FileSource4OpenEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource4OpenEv"></span><span id="arrow::dataset::FileSource::OpenC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a08a746f43fce78b9c9a1e12aa4367ee7"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io16RandomAccessFileE" title="arrow::io::RandomAccessFile"><span class="n"><span class="pre">RandomAccessFile</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Open</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource4OpenEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get a RandomAccessFile which views this file source. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource4SizeEv">
<span id="_CPPv3NK5arrow7dataset10FileSource4SizeEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource4SizeEv"></span><span id="arrow::dataset::FileSource::SizeC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a87f94bd386e77e4f740ddba91c159726"></span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Size</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource4SizeEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the size (in bytes) of the file or buffer If the file is compressed this should be the compressed (on-disk) size. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource14OpenCompressedENSt8optionalIN11Compression4typeEEE">
<span id="_CPPv3NK5arrow7dataset10FileSource14OpenCompressedENSt8optionalIN11Compression4typeEEE"></span><span id="_CPPv2NK5arrow7dataset10FileSource14OpenCompressedENSt8optionalIN11Compression4typeEEE"></span><span id="arrow::dataset::FileSource::OpenCompressed__std::optional:Compression::type:C"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1aa571d61871068f562cd23d5e96ed9f31"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io11InputStreamE" title="arrow::io::InputStream"><span class="n"><span class="pre">InputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">OpenCompressed</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">optional</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Compression</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="utilities.html#_CPPv4N5arrow11Compression4typeE" title="arrow::Compression::type"><span class="n"><span class="pre">type</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">compression</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">nullopt</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource14OpenCompressedENSt8optionalIN11Compression4typeEEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get an InputStream which views this file source (and decompresses if needed) </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>compression</strong><strong>[in]</strong> If nullopt, guess the compression scheme from the filename, else decompress with the given codec </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource6EqualsERK10FileSource">
<span id="_CPPv3NK5arrow7dataset10FileSource6EqualsERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset10FileSource6EqualsERK10FileSource"></span><span id="arrow::dataset::FileSource::Equals__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a0a1eebf977d91486778ebedb74302803"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Equals</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">other</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource6EqualsERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>equality comparison with another <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a></p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormatE">
<span id="_CPPv3N5arrow7dataset10FileFormatE"></span><span id="_CPPv2N5arrow7dataset10FileFormatE"></span><span id="arrow::dataset::FileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">enable_shared_from_this</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>Base class for file format implementation. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_csv_file_format"><span class="std std-ref">arrow::dataset::CsvFileFormat</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_ipc_file_format"><span class="std std-ref">arrow::dataset::IpcFileFormat</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_json_file_format"><span class="std std-ref">arrow::dataset::JsonFileFormat</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_orc_file_format"><span class="std std-ref">arrow::dataset::OrcFileFormat</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_file_format"><span class="std std-ref">arrow::dataset::ParquetFileFormat</span></a>, <a class="reference internal" href="#classskyhook_1_1_skyhook_file_format"><span class="std std-ref">skyhook::SkyhookFileFormat</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset10FileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset10FileFormat9type_nameEv"></span><span id="arrow::dataset::FileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1ab089438dfec369581face93a00fdeb08"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::FileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1ae934a7b69d0402f36aecfa6037fa1343"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset10FileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset10FileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::FileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a9c4a4144a9dd512a9bb1a165bb3961c9"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat7InspectERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3NK5arrow7dataset10FileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2NK5arrow7dataset10FileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::FileFormat::InspectFragment__FileSourceCR.FragmentScanOptionsCP.compute::ExecContextPC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a4857699ab4c4ae94fabc833980ea3856"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">InspectedFragment</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectFragment</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Learn what we need about the file before we start scanning it. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::FileFormat::MakeFragment__FileSource.compute::Expression.std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1aa4fe4dcef6f021d90b3ed929c6d2a06f"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Open a fragment. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE"></span><span id="arrow::dataset::FileFormat::MakeFragment__FileSource.compute::Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a3c0befdc51c1c2d9a561107ecd02007c"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::FileFormat::MakeFragment__FileSource.std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a9e605ca731c0c18f2d34291aedeadb30"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE">
<span id="_CPPv3NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="_CPPv2NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="arrow::dataset::FileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:.fs::FileLocatorC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a54f9260d43d2fe5536752575ccf51ae3"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset10FileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset10FileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::FileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1aec4ca5f3edb4c2d184dad6a5ef05ad20"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
<p>May return null shared_ptr if this file format does not yet support writing datasets. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat29default_fragment_scan_optionsE">
<span id="_CPPv3N5arrow7dataset10FileFormat29default_fragment_scan_optionsE"></span><span id="_CPPv2N5arrow7dataset10FileFormat29default_fragment_scan_optionsE"></span><span id="arrow::dataset::FileFormat::default_fragment_scan_options__std::shared_ptr:FragmentScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a0fca7a90a8f73961d34f8c62e9ebedcf"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">default_fragment_scan_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat29default_fragment_scan_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options affecting how this format is scanned. </p>
<p>The options here can be overridden at scan time. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragmentE">
<span id="_CPPv3N5arrow7dataset12FileFragmentE"></span><span id="_CPPv2N5arrow7dataset12FileFragmentE"></span><span id="arrow::dataset::FileFragment"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileFragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">util</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">EqualityComparable</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> that is stored in a file with a known format. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_file_fragment"><span class="std std-ref">arrow::dataset::ParquetFileFragment</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::FileFragment::ScanBatchesAsync__std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment_1a521083e64f25370d66a39b87d8a28839"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesAsync</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>An asynchronous version of Scan. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset12FileFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset12FileFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::FileFragment::CountRows__compute::Expression.std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment_1a7a181b780129edfc5dff671c42e50868"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">optional</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">int64_t</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CountRows</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragment9CountRowsEN7compute10ExpressionERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Count the number of rows in this fragment matching the filter using metadata only. </p>
<p>That is, this method may perform I/O, but will not load data.</p>
<p>If this is not possible, resolve with an empty optional. The fragment can perform I/O (e.g. to read metadata) before it deciding whether it can satisfy the request. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3N5arrow7dataset12FileFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2N5arrow7dataset12FileFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::FileFragment::BeginScan__FragmentScanRequestCR.InspectedFragmentCR.FragmentScanOptionsCP.compute::ExecContextP"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment_1abd3887e89f7ee5f228136fa93b9e6842"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">FragmentScanner</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">BeginScan</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">FragmentScanRequest</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">request</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">InspectedFragment</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">inspected_fragment</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragment9BeginScanERK19FragmentScanRequestRK17InspectedFragmentPK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Start a scan operation. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3N5arrow7dataset12FileFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2N5arrow7dataset12FileFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::FileFragment::InspectFragment__FragmentScanOptionsCP.compute::ExecContextP"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment_1a50423efc3efbfa1d5c4a5fb0835e6cb2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">InspectedFragment</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectFragment</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragment15InspectFragmentEPK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Inspect a fragment to learn basic information. </p>
<p>This will be called before a scan and a fragment should attach whatever information will be needed to figure out an evolution strategy. This information will then be passed to the call to BeginScan </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17FileSystemDatasetE">
<span id="_CPPv3N5arrow7dataset17FileSystemDatasetE"></span><span id="_CPPv2N5arrow7dataset17FileSystemDatasetE"></span><span id="arrow::dataset::FileSystemDataset"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemDataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset17FileSystemDatasetE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> of FileFragments. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> is composed of one or more <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>. The fragments are independent and don’t need to share the same format and/or filesystem. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset9type_nameEv"></span><span id="arrow::dataset::FileSystemDataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a9efe37fcf02fce6fe246c9a4a8163190"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the type name of the dataset. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::FileSystemDataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a652887883af7444deee7c87f8e4b59a2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Replace the schema of the dataset. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset5filesEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset5filesEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset5filesEv"></span><span id="arrow::dataset::FileSystemDataset::filesC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1acb41216cc0c88906fec2a169093875f4"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">files</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset5filesEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the path of files. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset6formatEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset6formatEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset6formatEv"></span><span id="arrow::dataset::FileSystemDataset::formatC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a271030810dabf0c541d27eeea16abdd2"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">format</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset6formatEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset10filesystemEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset10filesystemEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset10filesystemEv"></span><span id="arrow::dataset::FileSystemDataset::filesystemC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a5e059df7544b7f3c6e7ae0ea11407c39"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">filesystem</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset10filesystemEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the filesystem. May be nullptr if the fragments wrap buffers. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset12partitioningEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset12partitioningEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset12partitioningEv"></span><span id="arrow::dataset::FileSystemDataset::partitioningC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1af7358332f36d84ba11e9f179c01a0346"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset12partitioningEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the partitioning. </p>
<p>May be nullptr if the dataset was not constructed with a partitioning. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEEN7compute10ExpressionENSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEENSt10shared_ptrI12PartitioningEE">
<span id="_CPPv3N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEEN7compute10ExpressionENSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEENSt10shared_ptrI12PartitioningEE"></span><span id="_CPPv2N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEEN7compute10ExpressionENSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEENSt10shared_ptrI12PartitioningEE"></span><span id="arrow::dataset::FileSystemDataset::Make__std::shared_ptr:Schema:.compute::Expression.std::shared_ptr:FileFormat:.std::shared_ptr:fs::FileSystem:.std::vector:std::shared_ptr:FileFragment::.std::shared_ptr:Partitioning:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a00ca95dabd9547c1f3dd8677670f9d04"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset17FileSystemDatasetE" title="arrow::dataset::FileSystemDataset"><span class="n"><span class="pre">FileSystemDataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">root_partition</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">fragments</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">partitioning</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEEN7compute10ExpressionENSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEENSt10shared_ptrI12PartitioningEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a>. </p>
<p>
Note that fragments wrapping files resident in differing filesystems are not permitted; to work with multiple filesystems use a <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset"><span class="std std-ref">UnionDataset</span></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong><strong>[in]</strong> the schema of the dataset </p></li>
<li><p><strong>root_partition</strong><strong>[in]</strong> the partition expression of the dataset </p></li>
<li><p><strong>format</strong><strong>[in]</strong> the format of each <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>. </p></li>
<li><p><strong>filesystem</strong><strong>[in]</strong> the filesystem of each <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>, or nullptr if the fragments wrap buffers. </p></li>
<li><p><strong>fragments</strong><strong>[in]</strong> list of fragments to create the dataset from. </p></li>
<li><p><strong>partitioning</strong><strong>[in]</strong> the <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> object in case the dataset is created with a known partitioning (e.g. from a discovered partitioning through a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a>), or nullptr if not known.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>A constructed dataset. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE">
<span id="_CPPv3N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE"></span><span id="_CPPv2N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE"></span><span id="arrow::dataset::FileSystemDataset::Write__FileSystemDatasetWriteOptionsCR.std::shared_ptr:Scanner:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1acf725306aea3c8b46c34a2c160147e49"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><span class="n"><span class="pre">FileSystemDatasetWriteOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">write_options</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><span class="n"><span class="pre">Scanner</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">scanner</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Write a dataset. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16FileWriteOptionsE">
<span id="_CPPv3N5arrow7dataset16FileWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset16FileWriteOptionsE"></span><span id="arrow::dataset::FileWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_write_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileWriteOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>Options for writing a file of this format. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_csv_file_write_options"><span class="std std-ref">arrow::dataset::CsvFileWriteOptions</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_ipc_file_write_options"><span class="std std-ref">arrow::dataset::IpcFileWriteOptions</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_file_write_options"><span class="std std-ref">arrow::dataset::ParquetFileWriteOptions</span></a></p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriterE">
<span id="_CPPv3N5arrow7dataset10FileWriterE"></span><span id="_CPPv2N5arrow7dataset10FileWriterE"></span><span id="arrow::dataset::FileWriter"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileWriter</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriterE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>A writer for this format. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_csv_file_writer"><span class="std std-ref">arrow::dataset::CsvFileWriter</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_ipc_file_writer"><span class="std std-ref">arrow::dataset::IpcFileWriter</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_file_writer"><span class="std std-ref">arrow::dataset::ParquetFileWriter</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE">
<span id="_CPPv3N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="_CPPv2N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="arrow::dataset::FileWriter::Write__std::shared_ptr:RecordBatch:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1ab2d336af7fa7b0e7233f4faf7faaa7a6"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><span class="n"><span class="pre">RecordBatch</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">batch</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Write the given batch. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader">
<span id="_CPPv3N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader"></span><span id="_CPPv2N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader"></span><span id="arrow::dataset::FileWriter::Write__RecordBatchReaderP"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1a7be5615da56f7ee21b02805c869a7b91"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><span class="n"><span class="pre">RecordBatchReader</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">batches</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Write all batches from the reader. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriter6FinishEv">
<span id="_CPPv3N5arrow7dataset10FileWriter6FinishEv"></span><span id="_CPPv2N5arrow7dataset10FileWriter6FinishEv"></span><span id="arrow::dataset::FileWriter::Finish"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1a1e5400135956180accdaf369053b56a8"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriter6FinishEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate that writing is done. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileWriter15GetBytesWrittenEv">
<span id="_CPPv3NK5arrow7dataset10FileWriter15GetBytesWrittenEv"></span><span id="_CPPv2NK5arrow7dataset10FileWriter15GetBytesWrittenEv"></span><span id="arrow::dataset::FileWriter::GetBytesWrittenC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1a169eacf6e18a03925ed53512dbcb434c"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">int64_t</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetBytesWritten</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileWriter15GetBytesWrittenEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>After <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_writer_1a1e5400135956180accdaf369053b56a8"><span class="std std-ref">Finish()</span></a> is called, provides number of bytes written to file. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptionsE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemDatasetWriteOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>Options for writing a dataset. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::file_write_options__std::shared_ptr:FileWriteOptions:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a03a653d68b2c626088f7fe4c07a21493"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">file_write_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options for individual fragment writing. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::filesystem__std::shared_ptr:fs::FileSystem:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a15f1d8b08b576ae26b8f0c6e0bf78f61"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">filesystem</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>FileSystem into which a dataset will be written. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::base_dir__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a9111e2576afd0b716de9e5db388e12ce"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">base_dir</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Root directory into which the dataset will be written. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::partitioning__std::shared_ptr:Partitioning:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a779361c86adf6237e58cb65935fc9bad"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> used to generate fragment paths. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::max_partitions__i"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a2bca12ddd6e2c3252253cb0feb232f80"></span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">max_partitions</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1024</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Maximum number of partitions any batch may be written into, default is 1K. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::basename_template__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a9cf0bbd89148c7ea5481c8b0f5c674fd"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">basename_template</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Template string used to generate fragment basenames. </p>
<p>{i} will be replaced by an auto incremented integer. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions25basename_template_functorE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions25basename_template_functorE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions25basename_template_functorE"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1ac8a0129a675b29d6525b6cf9a4b955cf"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">(</span></span><span class="kt"><span class="pre">int</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">basename_template_functor</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions25basename_template_functorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A functor which will be applied on an incremented counter. </p>
<p>The result will be inserted into the basename_template in place of {i}.</p>
<p>This can be used, for example, to left-pad the file counter. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions14max_open_filesE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions14max_open_filesE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions14max_open_filesE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::max_open_files__uint32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1aeb95e6ed9cf11af0787a51f689429a9a"></span><span class="n"><span class="pre">uint32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">max_open_files</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">900</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions14max_open_filesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If greater than 0 then this will limit the maximum number of files that can be left open. </p>
<p>If an attempt is made to open too many files then the least recently used file will be closed. If this setting is set too low you may end up fragmenting your data into many small files.</p>
<p>The default is 900 which also allows some # of files to be open by the scanner before hitting the default Linux limit of 1024 </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17max_rows_per_fileE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions17max_rows_per_fileE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions17max_rows_per_fileE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::max_rows_per_file__uint64_t"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1aedfda9ae74c15a8bc1195da5edeaf4c0"></span><span class="n"><span class="pre">uint64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">max_rows_per_file</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17max_rows_per_fileE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If greater than 0 then this will limit how many rows are placed in any single file. </p>
<p>Otherwise there will be no limit and one file will be created in each output directory unless files need to be closed to respect max_open_files </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18min_rows_per_groupE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions18min_rows_per_groupE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions18min_rows_per_groupE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::min_rows_per_group__uint64_t"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a791ed04a5c65f729efeb4a58c206682d"></span><span class="n"><span class="pre">uint64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">min_rows_per_group</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18min_rows_per_groupE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If greater than 0 then this will cause the dataset writer to batch incoming data and only write the row groups to the disk when sufficient rows have accumulated. </p>
<p>The final row group size may be less than this value and other options such as <code class="docutils literal notranslate"><span class="pre">max_open_files</span></code> or <code class="docutils literal notranslate"><span class="pre">max_rows_per_file</span></code> lead to smaller row group sizes. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18max_rows_per_groupE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions18max_rows_per_groupE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions18max_rows_per_groupE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::max_rows_per_group__uint64_t"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a1f4c0ae71588e4087bb4617f76aa7a46"></span><span class="n"><span class="pre">uint64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">max_rows_per_group</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1</span></span><span class="w"> </span><span class="o"><span class="pre">&lt;&lt;</span></span><span class="w"> </span><span class="m"><span class="pre">20</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18max_rows_per_groupE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If greater than 0 then the dataset writer may split up large incoming batches into multiple row groups. </p>
<p>If this value is set then min_rows_per_group should also be set or else you may end up with very small row groups (e.g. if the incoming row group size is just barely larger than this value). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions22existing_data_behaviorE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions22existing_data_behaviorE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions22existing_data_behaviorE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::existing_data_behavior__ExistingDataBehavior"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1af18dfccff85b3fe5df0a5e83119fd3b3"></span><span class="n"><span class="pre">ExistingDataBehavior</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">existing_data_behavior</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">ExistingDataBehavior</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">kError</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions22existing_data_behaviorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Controls what happens if an output directory already exists. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions10create_dirE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions10create_dirE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions10create_dirE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::create_dir__b"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1aede500e700966b9c88f399fea2a6336f"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">create_dir</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">true</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions10create_dirE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If false the dataset writer will not create directories This is mainly intended for filesystems that do not require directories such as S3. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17writer_pre_finishE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions17writer_pre_finishE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions17writer_pre_finishE"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a29684d31797cfff4840ce4244e253f6a"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="p"><span class="pre">(</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">*</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">writer_pre_finish</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><code class="docutils literal notranslate"><span class="pre">[](FileWriter*)</span> <span class="pre">{returnStatus::OK();}</span></code><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17writer_pre_finishE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Callback to be invoked against all FileWriters before they are finalized with <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_writer_1a1e5400135956180accdaf369053b56a8"><span class="std std-ref">FileWriter::Finish()</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18writer_post_finishE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions18writer_post_finishE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions18writer_post_finishE"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a44c84a8f3ae03075d4552f0195442a07"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="p"><span class="pre">(</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">*</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">writer_post_finish</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><code class="docutils literal notranslate"><span class="pre">[](FileWriter*)</span> <span class="pre">{returnStatus::OK();}</span></code><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18writer_post_finishE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Callback to be invoked against all FileWriters after they have called <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_writer_1a1e5400135956180accdaf369053b56a8"><span class="std std-ref">FileWriter::Finish()</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16WriteNodeOptionsE">
<span id="_CPPv3N5arrow7dataset16WriteNodeOptionsE"></span><span id="_CPPv2N5arrow7dataset16WriteNodeOptionsE"></span><span id="arrow::dataset::WriteNodeOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_write_node_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">WriteNodeOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">acero</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="acero.html#_CPPv4N5arrow5acero15ExecNodeOptionsE" title="arrow::acero::ExecNodeOptions"><span class="n"><span class="pre">ExecNodeOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset16WriteNodeOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_base.h&gt;</em></div>
<p>Wraps <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_dataset_write_options"><span class="std std-ref">FileSystemDatasetWriteOptions</span></a> for consumption as compute::ExecNodeOptions. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16WriteNodeOptions13write_optionsE">
<span id="_CPPv3N5arrow7dataset16WriteNodeOptions13write_optionsE"></span><span id="_CPPv2N5arrow7dataset16WriteNodeOptions13write_optionsE"></span><span id="arrow::dataset::WriteNodeOptions::write_options__FileSystemDatasetWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_write_node_options_1a306e717d6e747e127108f73fc02c9035"></span><a class="reference internal" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><span class="n"><span class="pre">FileSystemDatasetWriteOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">write_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16WriteNodeOptions13write_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options to control how to write the dataset. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16WriteNodeOptions13custom_schemaE">
<span id="_CPPv3N5arrow7dataset16WriteNodeOptions13custom_schemaE"></span><span id="_CPPv2N5arrow7dataset16WriteNodeOptions13custom_schemaE"></span><span id="arrow::dataset::WriteNodeOptions::custom_schema__std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_write_node_options_1aeb112f4334c1828094a166a4bf511abc"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">custom_schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16WriteNodeOptions13custom_schemaE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Optional schema to attach to all written batches. </p>
<p>By default, we will use the output schema of the input.</p>
<p>This can be used to alter schema metadata, field nullability, or field metadata. However, this cannot be used to change the type of data. If the custom schema does not have the same number of fields and the same data types as the input then the plan will fail. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16WriteNodeOptions15custom_metadataE">
<span id="_CPPv3N5arrow7dataset16WriteNodeOptions15custom_metadataE"></span><span id="_CPPv2N5arrow7dataset16WriteNodeOptions15custom_metadataE"></span><span id="arrow::dataset::WriteNodeOptions::custom_metadata__std::shared_ptr:KeyValueMetadataC:"></span><span class="target" id="classarrow_1_1dataset_1_1_write_node_options_1a26144a14fa59b0049ad85c4e499f882f"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow16KeyValueMetadataE" title="arrow::KeyValueMetadata"><span class="n"><span class="pre">KeyValueMetadata</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">custom_metadata</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16WriteNodeOptions15custom_metadataE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Optional metadata to attach to written batches. </p>
</dd></dl>
</div>
</dd></dl>
</section>
<section id="file-formats">
<h3>File Formats<a class="headerlink" href="#file-formats" title="Permalink to this heading">#</a></h3>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv412kIpcTypeName">
<span id="_CPPv312kIpcTypeName"></span><span id="_CPPv212kIpcTypeName"></span><span id="kIpcTypeName__cA"></span><span class="target" id="group__dataset-file-formats_1ga4f5f2a25385977113858d4ecc7d0f2bf"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">char</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kIpcTypeName</span></span></span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;ipc&quot;</span></span><a class="headerlink" href="#_CPPv412kIpcTypeName" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv413kJsonTypeName">
<span id="_CPPv313kJsonTypeName"></span><span id="_CPPv213kJsonTypeName"></span><span id="kJsonTypeName__cA"></span><span class="target" id="group__dataset-file-formats_1ga1ca1fd6a196b8f57fc31f1143df8c357"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">char</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kJsonTypeName</span></span></span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;json&quot;</span></span><a class="headerlink" href="#_CPPv413kJsonTypeName" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv412kOrcTypeName">
<span id="_CPPv312kOrcTypeName"></span><span id="_CPPv212kOrcTypeName"></span><span id="kOrcTypeName__cA"></span><span class="target" id="group__dataset-file-formats_1gaf4cf8898498781bfbe971a2238f574c9"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">char</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kOrcTypeName</span></span></span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;orc&quot;</span></span><a class="headerlink" href="#_CPPv412kOrcTypeName" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv416kParquetTypeName">
<span id="_CPPv316kParquetTypeName"></span><span id="_CPPv216kParquetTypeName"></span><span id="kParquetTypeName__cA"></span><span class="target" id="group__dataset-file-formats_1gac995309ed7d81dcc769cd4d2b5419021"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">char</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kParquetTypeName</span></span></span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;parquet&quot;</span></span><a class="headerlink" href="#_CPPv416kParquetTypeName" title="Permalink to this definition">#</a><br /></dt>
<dd></dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13CsvFileFormatE">
<span id="_CPPv3N5arrow7dataset13CsvFileFormatE"></span><span id="_CPPv2N5arrow7dataset13CsvFileFormatE"></span><span id="arrow::dataset::CsvFileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CsvFileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13CsvFileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_csv.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format"><span class="std std-ref">FileFormat</span></a> implementation that reads from and writes to Csv files. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13CsvFileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset13CsvFileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset13CsvFileFormat9type_nameEv"></span><span id="arrow::dataset::CsvFileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1a956c86037c5f93e63599b0d40ef41150"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13CsvFileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13CsvFileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset13CsvFileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset13CsvFileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::CsvFileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1a8d61c4b40c17a1201d7fe1d03702d07f"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13CsvFileFormat11IsSupportedERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13CsvFileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset13CsvFileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset13CsvFileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::CsvFileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1ada6acb893f92cb2e672f8d1b63370b26"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13CsvFileFormat7InspectERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13CsvFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3NK5arrow7dataset13CsvFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2NK5arrow7dataset13CsvFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::CsvFileFormat::InspectFragment__FileSourceCR.FragmentScanOptionsCP.compute::ExecContextPC"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1af114bc2fa5a5a2eb575f94d84649cd60"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">InspectedFragment</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectFragment</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13CsvFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Learn what we need about the file before we start scanning it. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13CsvFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE">
<span id="_CPPv3NK5arrow7dataset13CsvFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="_CPPv2NK5arrow7dataset13CsvFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="arrow::dataset::CsvFileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:.fs::FileLocatorC"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1a97946721552c48c67d0310ea99cf63d3"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13CsvFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13CsvFileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset13CsvFileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset13CsvFileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::CsvFileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1ab3d7ef9b527dfc378093f431e267c287"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13CsvFileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
<p>May return null shared_ptr if this file format does not yet support writing datasets. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13CsvFileFormat13parse_optionsE">
<span id="_CPPv3N5arrow7dataset13CsvFileFormat13parse_optionsE"></span><span id="_CPPv2N5arrow7dataset13CsvFileFormat13parse_optionsE"></span><span id="arrow::dataset::CsvFileFormat::parse_options__csv::ParseOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_format_1a0f15310877223a9f0c3b26e1bf7e6eec"></span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12ParseOptionsE" title="arrow::csv::ParseOptions"><span class="n"><span class="pre">ParseOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">parse_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12ParseOptionsE" title="arrow::csv::ParseOptions"><span class="n"><span class="pre">ParseOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12ParseOptions8DefaultsEv" title="arrow::csv::ParseOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13CsvFileFormat13parse_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options affecting the parsing of CSV files. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22CsvFragmentScanOptionsE">
<span id="_CPPv3N5arrow7dataset22CsvFragmentScanOptionsE"></span><span id="_CPPv2N5arrow7dataset22CsvFragmentScanOptionsE"></span><span id="arrow::dataset::CsvFragmentScanOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_csv_fragment_scan_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CsvFragmentScanOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset22CsvFragmentScanOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_csv.h&gt;</em></div>
<p>Per-scan options for CSV fragments. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22CsvFragmentScanOptions15convert_optionsE">
<span id="_CPPv3N5arrow7dataset22CsvFragmentScanOptions15convert_optionsE"></span><span id="_CPPv2N5arrow7dataset22CsvFragmentScanOptions15convert_optionsE"></span><span id="arrow::dataset::CsvFragmentScanOptions::convert_options__csv::ConvertOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_csv_fragment_scan_options_1a9e0ad6af7d8884a16a4f2c66dbfd77a5"></span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv14ConvertOptionsE" title="arrow::csv::ConvertOptions"><span class="n"><span class="pre">ConvertOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">convert_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv14ConvertOptionsE" title="arrow::csv::ConvertOptions"><span class="n"><span class="pre">ConvertOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv14ConvertOptions8DefaultsEv" title="arrow::csv::ConvertOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset22CsvFragmentScanOptions15convert_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>CSV conversion options. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22CsvFragmentScanOptions12read_optionsE">
<span id="_CPPv3N5arrow7dataset22CsvFragmentScanOptions12read_optionsE"></span><span id="_CPPv2N5arrow7dataset22CsvFragmentScanOptions12read_optionsE"></span><span id="arrow::dataset::CsvFragmentScanOptions::read_options__csv::ReadOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_csv_fragment_scan_options_1ad1658cb67d0dfbf075fd9ac10614ad7c"></span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv11ReadOptionsE" title="arrow::csv::ReadOptions"><span class="n"><span class="pre">ReadOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">read_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv11ReadOptionsE" title="arrow::csv::ReadOptions"><span class="n"><span class="pre">ReadOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv11ReadOptions8DefaultsEv" title="arrow::csv::ReadOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset22CsvFragmentScanOptions12read_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>CSV reading options. </p>
<p>Note that use_threads is always ignored. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22CsvFragmentScanOptions13parse_optionsE">
<span id="_CPPv3N5arrow7dataset22CsvFragmentScanOptions13parse_optionsE"></span><span id="_CPPv2N5arrow7dataset22CsvFragmentScanOptions13parse_optionsE"></span><span id="arrow::dataset::CsvFragmentScanOptions::parse_options__csv::ParseOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_csv_fragment_scan_options_1a3dc871290cb2912aece46fe4e7da7f90"></span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12ParseOptionsE" title="arrow::csv::ParseOptions"><span class="n"><span class="pre">ParseOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">parse_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12ParseOptionsE" title="arrow::csv::ParseOptions"><span class="n"><span class="pre">ParseOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12ParseOptions8DefaultsEv" title="arrow::csv::ParseOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset22CsvFragmentScanOptions13parse_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>CSV parse options. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22CsvFragmentScanOptions21stream_transform_funcE">
<span id="_CPPv3N5arrow7dataset22CsvFragmentScanOptions21stream_transform_funcE"></span><span id="_CPPv2N5arrow7dataset22CsvFragmentScanOptions21stream_transform_funcE"></span><span id="arrow::dataset::CsvFragmentScanOptions::stream_transform_func__StreamWrapFunc"></span><span class="target" id="structarrow_1_1dataset_1_1_csv_fragment_scan_options_1a0642ef22ed85933ef492c1246ff010e7"></span><span class="n"><span class="pre">StreamWrapFunc</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">stream_transform_func</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset22CsvFragmentScanOptions21stream_transform_funcE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Optional stream wrapping function. </p>
<p>If defined, all open dataset file fragments will be passed through this function. One possible use case is to transparently transcode all input files from a given character set to utf8. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19CsvFileWriteOptionsE">
<span id="_CPPv3N5arrow7dataset19CsvFileWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset19CsvFileWriteOptionsE"></span><span id="arrow::dataset::CsvFileWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_write_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CsvFileWriteOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset19CsvFileWriteOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_csv.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19CsvFileWriteOptions13write_optionsE">
<span id="_CPPv3N5arrow7dataset19CsvFileWriteOptions13write_optionsE"></span><span id="_CPPv2N5arrow7dataset19CsvFileWriteOptions13write_optionsE"></span><span id="arrow::dataset::CsvFileWriteOptions::write_options__std::shared_ptr:csv::WriteOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_write_options_1a0a06c0f02f3c8a6f86b17d5f3b00f18c"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">csv</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow3csv12WriteOptionsE" title="arrow::csv::WriteOptions"><span class="n"><span class="pre">WriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">write_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19CsvFileWriteOptions13write_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options passed to <a class="reference internal" href="formats.html#group__csv-writer-factories_1gaf3087d8c3da92b5c544d6e43fedc1a96"><span class="std std-ref">csv::MakeCSVWriter</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13CsvFileWriterE">
<span id="_CPPv3N5arrow7dataset13CsvFileWriterE"></span><span id="_CPPv2N5arrow7dataset13CsvFileWriterE"></span><span id="arrow::dataset::CsvFileWriter"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_writer"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">CsvFileWriter</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13CsvFileWriterE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_csv.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13CsvFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE">
<span id="_CPPv3N5arrow7dataset13CsvFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="_CPPv2N5arrow7dataset13CsvFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="arrow::dataset::CsvFileWriter::Write__std::shared_ptr:RecordBatch:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_csv_file_writer_1a3511c26bb09507a3eae5bfebf6a3fde3"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><span class="n"><span class="pre">RecordBatch</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">batch</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13CsvFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Write the given batch. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13IpcFileFormatE">
<span id="_CPPv3N5arrow7dataset13IpcFileFormatE"></span><span id="_CPPv2N5arrow7dataset13IpcFileFormatE"></span><span id="arrow::dataset::IpcFileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IpcFileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13IpcFileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_ipc.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format"><span class="std std-ref">FileFormat</span></a> implementation that reads from and writes to Ipc files. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13IpcFileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset13IpcFileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset13IpcFileFormat9type_nameEv"></span><span id="arrow::dataset::IpcFileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_format_1a19d3016dad597fbd87ebca2528794338"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13IpcFileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13IpcFileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset13IpcFileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset13IpcFileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::IpcFileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_format_1a7a143ae018f520857453cf983f9a53bf"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13IpcFileFormat11IsSupportedERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13IpcFileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset13IpcFileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset13IpcFileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::IpcFileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_format_1a3d3cbfd28ab8ce58cc93b18d36c4a93a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13IpcFileFormat7InspectERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13IpcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE">
<span id="_CPPv3NK5arrow7dataset13IpcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="_CPPv2NK5arrow7dataset13IpcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="arrow::dataset::IpcFileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:.fs::FileLocatorC"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_format_1a8e6868a6144ddf24089f9ccb8895f7c4"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13IpcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13IpcFileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset13IpcFileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset13IpcFileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::IpcFileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_format_1a6f1f5c5754c5e94ff69e6b708c013307"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13IpcFileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
<p>May return null shared_ptr if this file format does not yet support writing datasets. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22IpcFragmentScanOptionsE">
<span id="_CPPv3N5arrow7dataset22IpcFragmentScanOptionsE"></span><span id="_CPPv2N5arrow7dataset22IpcFragmentScanOptionsE"></span><span id="arrow::dataset::IpcFragmentScanOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_fragment_scan_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IpcFragmentScanOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset22IpcFragmentScanOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_ipc.h&gt;</em></div>
<p>Per-scan options for IPC fragments. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22IpcFragmentScanOptions7optionsE">
<span id="_CPPv3N5arrow7dataset22IpcFragmentScanOptions7optionsE"></span><span id="_CPPv2N5arrow7dataset22IpcFragmentScanOptions7optionsE"></span><span id="arrow::dataset::IpcFragmentScanOptions::options__std::shared_ptr:ipc::IpcReadOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_fragment_scan_options_1a7db62a4708d0ed9da34c20d24afdd5e1"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ipc</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="ipc.html#_CPPv4N5arrow3ipc14IpcReadOptionsE" title="arrow::ipc::IpcReadOptions"><span class="n"><span class="pre">IpcReadOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset22IpcFragmentScanOptions7optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options passed to the IPC file reader. </p>
<p>included_fields, memory_pool, and use_threads are ignored. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset22IpcFragmentScanOptions13cache_optionsE">
<span id="_CPPv3N5arrow7dataset22IpcFragmentScanOptions13cache_optionsE"></span><span id="_CPPv2N5arrow7dataset22IpcFragmentScanOptions13cache_optionsE"></span><span id="arrow::dataset::IpcFragmentScanOptions::cache_options__std::shared_ptr:io::CacheOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_fragment_scan_options_1a877bf054973b1eac5858d7f40d20b8c2"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">CacheOptions</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">cache_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset22IpcFragmentScanOptions13cache_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>If present, the async scanner will enable I/O coalescing. </p>
<p>This is ignored by the sync scanner. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19IpcFileWriteOptionsE">
<span id="_CPPv3N5arrow7dataset19IpcFileWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset19IpcFileWriteOptionsE"></span><span id="arrow::dataset::IpcFileWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_write_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IpcFileWriteOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset19IpcFileWriteOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_ipc.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19IpcFileWriteOptions7optionsE">
<span id="_CPPv3N5arrow7dataset19IpcFileWriteOptions7optionsE"></span><span id="_CPPv2N5arrow7dataset19IpcFileWriteOptions7optionsE"></span><span id="arrow::dataset::IpcFileWriteOptions::options__std::shared_ptr:ipc::IpcWriteOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_write_options_1a3203bcdf33861289a907e60617226240"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ipc</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="ipc.html#_CPPv4N5arrow3ipc15IpcWriteOptionsE" title="arrow::ipc::IpcWriteOptions"><span class="n"><span class="pre">IpcWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19IpcFileWriteOptions7optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options passed to <a class="reference internal" href="ipc.html#group__record-batch-writer-factories_1gaa2cfff8a11418dcdd6bb3a6ce94b3344"><span class="std std-ref">ipc::MakeFileWriter</span></a>. use_threads is ignored. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19IpcFileWriteOptions8metadataE">
<span id="_CPPv3N5arrow7dataset19IpcFileWriteOptions8metadataE"></span><span id="_CPPv2N5arrow7dataset19IpcFileWriteOptions8metadataE"></span><span id="arrow::dataset::IpcFileWriteOptions::metadata__std::shared_ptr:KeyValueMetadataC:"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_write_options_1aa8b403b52cc01a5a5a5831ee549b9d47"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="datatype.html#_CPPv4N5arrow16KeyValueMetadataE" title="arrow::KeyValueMetadata"><span class="n"><span class="pre">KeyValueMetadata</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">metadata</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19IpcFileWriteOptions8metadataE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>custom_metadata written to the file’s footer </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13IpcFileWriterE">
<span id="_CPPv3N5arrow7dataset13IpcFileWriterE"></span><span id="_CPPv2N5arrow7dataset13IpcFileWriterE"></span><span id="arrow::dataset::IpcFileWriter"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_writer"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IpcFileWriter</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13IpcFileWriterE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_ipc.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13IpcFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE">
<span id="_CPPv3N5arrow7dataset13IpcFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="_CPPv2N5arrow7dataset13IpcFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="arrow::dataset::IpcFileWriter::Write__std::shared_ptr:RecordBatch:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_ipc_file_writer_1a16d4003a52404a1f0665ea836232dd6a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><span class="n"><span class="pre">RecordBatch</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">batch</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13IpcFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Write the given batch. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14JsonFileFormatE">
<span id="_CPPv3N5arrow7dataset14JsonFileFormatE"></span><span id="_CPPv2N5arrow7dataset14JsonFileFormatE"></span><span id="arrow::dataset::JsonFileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">JsonFileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset14JsonFileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_json.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format"><span class="std std-ref">FileFormat</span></a> implementation that reads from JSON files. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14JsonFileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset14JsonFileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset14JsonFileFormat9type_nameEv"></span><span id="arrow::dataset::JsonFileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format_1ae95f6bb2fe429f940e489c0e02e36365"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14JsonFileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14JsonFileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset14JsonFileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset14JsonFileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::JsonFileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format_1ae2f12ae9f0490f6d8150c310edc57dba"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14JsonFileFormat11IsSupportedERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14JsonFileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset14JsonFileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset14JsonFileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::JsonFileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format_1ae20cfbcf9842647ca46918320235aa14"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14JsonFileFormat7InspectERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14JsonFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE">
<span id="_CPPv3NK5arrow7dataset14JsonFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="_CPPv2NK5arrow7dataset14JsonFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE"></span><span id="arrow::dataset::JsonFileFormat::InspectFragment__FileSourceCR.FragmentScanOptionsCP.compute::ExecContextPC"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format_1a3ab68192b457ea54748652a56688ba4f"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="async.html#_CPPv4I0EN5arrow6FutureE" title="arrow::Future"><span class="n"><span class="pre">Future</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">InspectedFragment</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectFragment</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">format_options</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">ExecContext</span></span><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">exec_context</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14JsonFileFormat15InspectFragmentERK10FileSourcePK19FragmentScanOptionsPN7compute11ExecContextE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Learn what we need about the file before we start scanning it. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14JsonFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE">
<span id="_CPPv3NK5arrow7dataset14JsonFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="_CPPv2NK5arrow7dataset14JsonFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="arrow::dataset::JsonFileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:.fs::FileLocatorC"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format_1a6b0c9502d674bb8f4b81503c2774366a"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14JsonFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14JsonFileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset14JsonFileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset14JsonFileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::JsonFileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_json_file_format_1a3293a46b4ff827ec18d10c5b3bbf5b8a"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14JsonFileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
<p>May return null shared_ptr if this file format does not yet support writing datasets. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23JsonFragmentScanOptionsE">
<span id="_CPPv3N5arrow7dataset23JsonFragmentScanOptionsE"></span><span id="_CPPv2N5arrow7dataset23JsonFragmentScanOptionsE"></span><span id="arrow::dataset::JsonFragmentScanOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_json_fragment_scan_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">JsonFragmentScanOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset23JsonFragmentScanOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_json.h&gt;</em></div>
<p>Per-scan options for JSON fragments. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23JsonFragmentScanOptions13parse_optionsE">
<span id="_CPPv3N5arrow7dataset23JsonFragmentScanOptions13parse_optionsE"></span><span id="_CPPv2N5arrow7dataset23JsonFragmentScanOptions13parse_optionsE"></span><span id="arrow::dataset::JsonFragmentScanOptions::parse_options__json::ParseOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_json_fragment_scan_options_1ac94f03cb533f6a439d740501e6cd6b19"></span><span class="n"><span class="pre">json</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow4json12ParseOptionsE" title="arrow::json::ParseOptions"><span class="n"><span class="pre">ParseOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">parse_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">json</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow4json12ParseOptionsE" title="arrow::json::ParseOptions"><span class="n"><span class="pre">ParseOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow4json12ParseOptions8DefaultsEv" title="arrow::json::ParseOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset23JsonFragmentScanOptions13parse_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options that affect JSON parsing. </p>
<p>Note: <code class="docutils literal notranslate"><span class="pre">explicit_schema</span></code> and <code class="docutils literal notranslate"><span class="pre">unexpected_field_behavior</span></code> are ignored. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23JsonFragmentScanOptions12read_optionsE">
<span id="_CPPv3N5arrow7dataset23JsonFragmentScanOptions12read_optionsE"></span><span id="_CPPv2N5arrow7dataset23JsonFragmentScanOptions12read_optionsE"></span><span id="arrow::dataset::JsonFragmentScanOptions::read_options__json::ReadOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_json_fragment_scan_options_1a18a12293b547da5dea9691b4f96801be"></span><span class="n"><span class="pre">json</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow4json11ReadOptionsE" title="arrow::json::ReadOptions"><span class="n"><span class="pre">ReadOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">read_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">json</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow4json11ReadOptionsE" title="arrow::json::ReadOptions"><span class="n"><span class="pre">ReadOptions</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N5arrow4json11ReadOptions8DefaultsEv" title="arrow::json::ReadOptions::Defaults"><span class="n"><span class="pre">Defaults</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset23JsonFragmentScanOptions12read_optionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Options that affect JSON reading. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13OrcFileFormatE">
<span id="_CPPv3N5arrow7dataset13OrcFileFormatE"></span><span id="_CPPv2N5arrow7dataset13OrcFileFormatE"></span><span id="arrow::dataset::OrcFileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_orc_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">OrcFileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset13OrcFileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_orc.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format"><span class="std std-ref">FileFormat</span></a> implementation that reads from and writes to ORC files. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13OrcFileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset13OrcFileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset13OrcFileFormat9type_nameEv"></span><span id="arrow::dataset::OrcFileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_orc_file_format_1a1d7eeea9ff17fcf6afc21c8497dec80f"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13OrcFileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13OrcFileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset13OrcFileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset13OrcFileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::OrcFileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_orc_file_format_1a28ff98b8e44402c2e6cd8faec3ae8401"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13OrcFileFormat11IsSupportedERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13OrcFileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset13OrcFileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset13OrcFileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::OrcFileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_orc_file_format_1a0dbb16f87e094444cfa7a98f8300031a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13OrcFileFormat7InspectERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset13OrcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE">
<span id="_CPPv3NK5arrow7dataset13OrcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="_CPPv2NK5arrow7dataset13OrcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="arrow::dataset::OrcFileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:.fs::FileLocatorC"></span><span class="target" id="classarrow_1_1dataset_1_1_orc_file_format_1a02201b1d9e02394461dd277a8c5cbd6e"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset13OrcFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13OrcFileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset13OrcFileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset13OrcFileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::OrcFileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_orc_file_format_1a760b29e2c3854db5f0ab1bf8d2ef7f28"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13OrcFileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
<p>May return null shared_ptr if this file format does not yet support writing datasets. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormatE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormatE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormatE"></span><span id="arrow::dataset::ParquetFileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format"><span class="std std-ref">FileFormat</span></a> implementation that reads from Parquet files. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat17ParquetFileFormatERKN7parquet16ReaderPropertiesE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat17ParquetFileFormatERKN7parquet16ReaderPropertiesE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat17ParquetFileFormatERKN7parquet16ReaderPropertiesE"></span><span id="arrow::dataset::ParquetFileFormat::ParquetFileFormat__parquet::ReaderPropertiesCR"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1ad901dddc3300c6fa70139915e6ad546d"></span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFileFormat</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet16ReaderPropertiesE" title="parquet::ReaderProperties"><span class="n"><span class="pre">ReaderProperties</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">reader_properties</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat17ParquetFileFormatERKN7parquet16ReaderPropertiesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Convenience constructor which copies properties from a <a class="reference internal" href="formats.html#classparquet_1_1_reader_properties"><span class="std std-ref">parquet::ReaderProperties</span></a>. </p>
<p>memory_pool will be ignored. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17ParquetFileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset17ParquetFileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset17ParquetFileFormat9type_nameEv"></span><span id="arrow::dataset::ParquetFileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1aaae04966393cc5cd4df9258c899d619c"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17ParquetFileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17ParquetFileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset17ParquetFileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset17ParquetFileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::ParquetFileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a841f1cc6362ebc7b769998485e698229"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17ParquetFileFormat11IsSupportedERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17ParquetFileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset17ParquetFileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset17ParquetFileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::ParquetFileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a1b86068edfb07a50b1f69a40ae6c46c7"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17ParquetFileFormat7InspectERK10FileSource" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::ParquetFileFormat::MakeFragment__FileSource.compute::Expression.std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1abc6a673cf4b3e227dcac3dc3ea523b6b"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> targeting all RowGroups. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEENSt6vectorIiEE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEENSt6vectorIiEE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEENSt6vectorIiEE"></span><span id="arrow::dataset::ParquetFileFormat::MakeFragment__FileSource.compute::Expression.std::shared_ptr:Schema:.std::vector:i:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1ab1425a8257b6d0efcba2cc2aeb5be37c"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19ParquetFileFragmentE" title="arrow::dataset::ParquetFileFragment"><span class="n"><span class="pre">ParquetFileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">int</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">row_groups</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionENSt10shared_ptrI6SchemaEENSt6vectorIiEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>, restricted to the specified row groups. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17ParquetFileFormat9GetReaderERK10FileSourceRKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3NK5arrow7dataset17ParquetFileFormat9GetReaderERK10FileSourceRKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2NK5arrow7dataset17ParquetFileFormat9GetReaderERK10FileSourceRKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::ParquetFileFormat::GetReader__FileSourceCR.std::shared_ptr:ScanOptions:CRC"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a2fae15b7f349b4131daac90c75205bb8"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet5arrow10FileReaderE" title="parquet::arrow::FileReader"><span class="n"><span class="pre">FileReader</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetReader</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17ParquetFileFormat9GetReaderERK10FileSourceRKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return a FileReader on the given source. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17ParquetFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE">
<span id="_CPPv3NK5arrow7dataset17ParquetFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="_CPPv2NK5arrow7dataset17ParquetFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE"></span><span id="arrow::dataset::ParquetFileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:.fs::FileLocatorC"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a8897edcd44eb2b494aae8504ccc30eae"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17ParquetFileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEEN2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::ParquetFileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a17a9b334cc99b818024078ba6cacb77e"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
<p>May return null shared_ptr if this file format does not yet support writing datasets. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp">
<span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1aa4fe4dcef6f021d90b3ed929c6d2a06f"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="sig-paren">)</span><br /></dt>
<dd><p>Open a fragment. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE"></span><span id="arrow::dataset::ParquetFileFormat::MakeFragment__FileSource.compute::Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a3c0befdc51c1c2d9a561107ecd02007c"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::ParquetFileFormat::MakeFragment__FileSource.std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_format_1a9e605ca731c0c18f2d34291aedeadb30"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a>. </p>
</dd></dl>
</div>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileFormat13ReaderOptionsE">
<span id="_CPPv3N5arrow7dataset17ParquetFileFormat13ReaderOptionsE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileFormat13ReaderOptionsE"></span><span id="arrow::dataset::ParquetFileFormat::ReaderOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_parquet_file_format_1_1_reader_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReaderOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileFormat13ReaderOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
</dd></dl>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19ParquetFileFragmentE">
<span id="_CPPv3N5arrow7dataset19ParquetFileFragmentE"></span><span id="_CPPv2N5arrow7dataset19ParquetFileFragmentE"></span><span id="arrow::dataset::ParquetFileFragment"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFileFragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset19ParquetFileFragmentE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> with parquet logic. </p>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_file_fragment"><span class="std std-ref">ParquetFileFragment</span></a> provides a lazy (with respect to IO) interface to scan parquet files. Any heavy IO calls are deferred to the Scan() method.</p>
<p>The caller can provide an optional list of selected RowGroups to limit the number of scanned RowGroups, or to partition the scans across multiple threads.</p>
<p>Metadata can be explicitly provided, enabling pushdown predicate benefits without the potentially heavy IO of loading Metadata from the file system. This can induce significant performance boost when scanning high latency file systems. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19ParquetFileFragment10row_groupsEv">
<span id="_CPPv3NK5arrow7dataset19ParquetFileFragment10row_groupsEv"></span><span id="_CPPv2NK5arrow7dataset19ParquetFileFragment10row_groupsEv"></span><span id="arrow::dataset::ParquetFileFragment::row_groupsC"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_fragment_1abe767e20455e8121abfd00455492a443"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">int</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">row_groups</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19ParquetFileFragment10row_groupsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the RowGroups selected by this fragment. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19ParquetFileFragment8metadataEv">
<span id="_CPPv3N5arrow7dataset19ParquetFileFragment8metadataEv"></span><span id="_CPPv2N5arrow7dataset19ParquetFileFragment8metadataEv"></span><span id="arrow::dataset::ParquetFileFragment::metadata"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_fragment_1ab3d1b7b0901b482257e1dedf2a217fee"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileMetaData</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">metadata</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset19ParquetFileFragment8metadataEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the FileMetaData associated with this fragment. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19ParquetFileFragment22EnsureCompleteMetadataEPN7parquet5arrow10FileReaderE">
<span id="_CPPv3N5arrow7dataset19ParquetFileFragment22EnsureCompleteMetadataEPN7parquet5arrow10FileReaderE"></span><span id="_CPPv2N5arrow7dataset19ParquetFileFragment22EnsureCompleteMetadataEPN7parquet5arrow10FileReaderE"></span><span id="arrow::dataset::ParquetFileFragment::EnsureCompleteMetadata__parquet::arrow::FileReaderP"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_fragment_1a945228799ae4ce6eab9c72cccffd780f"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnsureCompleteMetadata</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet5arrow10FileReaderE" title="parquet::arrow::FileReader"><span class="n"><span class="pre">FileReader</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">reader</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset19ParquetFileFragment22EnsureCompleteMetadataEPN7parquet5arrow10FileReaderE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Ensure this fragment’s FileMetaData is in memory. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19ParquetFileFragment6SubsetEN7compute10ExpressionE">
<span id="_CPPv3N5arrow7dataset19ParquetFileFragment6SubsetEN7compute10ExpressionE"></span><span id="_CPPv2N5arrow7dataset19ParquetFileFragment6SubsetEN7compute10ExpressionE"></span><span id="arrow::dataset::ParquetFileFragment::Subset__compute::Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_fragment_1ae778c258f7660a8b68470b06652a9a5a"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Subset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">compute</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><span class="n"><span class="pre">Expression</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset19ParquetFileFragment6SubsetEN7compute10ExpressionE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return fragment which selects a filtered subset of this fragment’s RowGroups. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26ParquetFragmentScanOptionsE">
<span id="_CPPv3N5arrow7dataset26ParquetFragmentScanOptionsE"></span><span id="_CPPv2N5arrow7dataset26ParquetFragmentScanOptionsE"></span><span id="arrow::dataset::ParquetFragmentScanOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_fragment_scan_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFragmentScanOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset26ParquetFragmentScanOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<p>Per-scan options for Parquet fragments. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26ParquetFragmentScanOptions17reader_propertiesE">
<span id="_CPPv3N5arrow7dataset26ParquetFragmentScanOptions17reader_propertiesE"></span><span id="_CPPv2N5arrow7dataset26ParquetFragmentScanOptions17reader_propertiesE"></span><span id="arrow::dataset::ParquetFragmentScanOptions::reader_properties__std::shared_ptr:parquet::ReaderProperties:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_fragment_scan_options_1a3e1d8b356c7006cfad2ffe72f2f07820"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet16ReaderPropertiesE" title="parquet::ReaderProperties"><span class="n"><span class="pre">ReaderProperties</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">reader_properties</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26ParquetFragmentScanOptions17reader_propertiesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Reader properties. </p>
<p>Not all properties are respected: memory_pool comes from <a class="reference internal" href="#structarrow_1_1dataset_1_1_scan_options"><span class="std std-ref">ScanOptions</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26ParquetFragmentScanOptions23arrow_reader_propertiesE">
<span id="_CPPv3N5arrow7dataset26ParquetFragmentScanOptions23arrow_reader_propertiesE"></span><span id="_CPPv2N5arrow7dataset26ParquetFragmentScanOptions23arrow_reader_propertiesE"></span><span id="arrow::dataset::ParquetFragmentScanOptions::arrow_reader_properties__std::shared_ptr:parquet::ArrowReaderProperties:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_fragment_scan_options_1a229ddd0e9e93f063281e76243a357e89"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet21ArrowReaderPropertiesE" title="parquet::ArrowReaderProperties"><span class="n"><span class="pre">ArrowReaderProperties</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">arrow_reader_properties</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26ParquetFragmentScanOptions23arrow_reader_propertiesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Arrow reader properties. </p>
<p>Not all properties are respected: batch_size comes from <a class="reference internal" href="#structarrow_1_1dataset_1_1_scan_options"><span class="std std-ref">ScanOptions</span></a>. Additionally, dictionary columns come from ParquetFileFormat::ReaderOptions::dict_columns. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26ParquetFragmentScanOptions25parquet_decryption_configE">
<span id="_CPPv3N5arrow7dataset26ParquetFragmentScanOptions25parquet_decryption_configE"></span><span id="_CPPv2N5arrow7dataset26ParquetFragmentScanOptions25parquet_decryption_configE"></span><span id="arrow::dataset::ParquetFragmentScanOptions::parquet_decryption_config__std::shared_ptr:ParquetDecryptionConfig:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_fragment_scan_options_1a68b3181cefb396b0ddb3d280ab964dad"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ParquetDecryptionConfig</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">parquet_decryption_config</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26ParquetFragmentScanOptions25parquet_decryption_configE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>A configuration structure that provides decryption properties for a dataset. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23ParquetFileWriteOptionsE">
<span id="_CPPv3N5arrow7dataset23ParquetFileWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset23ParquetFileWriteOptionsE"></span><span id="arrow::dataset::ParquetFileWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_write_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFileWriteOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset23ParquetFileWriteOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23ParquetFileWriteOptions17writer_propertiesE">
<span id="_CPPv3N5arrow7dataset23ParquetFileWriteOptions17writer_propertiesE"></span><span id="_CPPv2N5arrow7dataset23ParquetFileWriteOptions17writer_propertiesE"></span><span id="arrow::dataset::ParquetFileWriteOptions::writer_properties__std::shared_ptr:parquet::WriterProperties:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_write_options_1a0873fc9084d492f07824d7c61416c887"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet16WriterPropertiesE" title="parquet::WriterProperties"><span class="n"><span class="pre">WriterProperties</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">writer_properties</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset23ParquetFileWriteOptions17writer_propertiesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Parquet writer properties. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset23ParquetFileWriteOptions23arrow_writer_propertiesE">
<span id="_CPPv3N5arrow7dataset23ParquetFileWriteOptions23arrow_writer_propertiesE"></span><span id="_CPPv2N5arrow7dataset23ParquetFileWriteOptions23arrow_writer_propertiesE"></span><span id="arrow::dataset::ParquetFileWriteOptions::arrow_writer_properties__std::shared_ptr:parquet::ArrowWriterProperties:"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_write_options_1aa49d51aa356fa59b47029f1f95bf8fec"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">parquet</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="formats.html#_CPPv4N7parquet21ArrowWriterPropertiesE" title="parquet::ArrowWriterProperties"><span class="n"><span class="pre">ArrowWriterProperties</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">arrow_writer_properties</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset23ParquetFileWriteOptions23arrow_writer_propertiesE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Parquet Arrow writer properties. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileWriterE">
<span id="_CPPv3N5arrow7dataset17ParquetFileWriterE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileWriterE"></span><span id="arrow::dataset::ParquetFileWriter"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_writer"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFileWriter</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileWriterE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17ParquetFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE">
<span id="_CPPv3N5arrow7dataset17ParquetFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="_CPPv2N5arrow7dataset17ParquetFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="arrow::dataset::ParquetFileWriter::Write__std::shared_ptr:RecordBatch:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_file_writer_1a01e4be6f29331d11fe0bdb39ff0ac111"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><span class="n"><span class="pre">RecordBatch</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">batch</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset17ParquetFileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Write the given batch. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset21ParquetFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset21ParquetFactoryOptionsE"></span><span id="arrow::dataset::ParquetFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_parquet_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetFactoryOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetFactoryOptionsE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<p>Options for making a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> from a Parquet _metadata file. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetFactoryOptions12partitioningE">
<span id="_CPPv3N5arrow7dataset21ParquetFactoryOptions12partitioningE"></span><span id="_CPPv2N5arrow7dataset21ParquetFactoryOptions12partitioningE"></span><span id="arrow::dataset::ParquetFactoryOptions::partitioning__PartitioningOrFactory"></span><span class="target" id="structarrow_1_1dataset_1_1_parquet_factory_options_1a0d841ce12972071d3330601e90cc0c6c"></span><a class="reference internal" href="#_CPPv4N5arrow7dataset21PartitioningOrFactoryE" title="arrow::dataset::PartitioningOrFactory"><span class="n"><span class="pre">PartitioningOrFactory</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12Partitioning7DefaultEv" title="arrow::dataset::Partitioning::Default"><span class="n"><span class="pre">Default</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetFactoryOptions12partitioningE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Either an explicit <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> or a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a> to discover one. </p>
<p>If a factory is provided, it will be used to infer a schema for partition fields based on file and directory paths then construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. The default is a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> which will yield no partition information.</p>
<p>The (explicit or discovered) partitioning will be applied to discovered files and the resulting partition information embedded in the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetFactoryOptions18partition_base_dirE">
<span id="_CPPv3N5arrow7dataset21ParquetFactoryOptions18partition_base_dirE"></span><span id="_CPPv2N5arrow7dataset21ParquetFactoryOptions18partition_base_dirE"></span><span id="arrow::dataset::ParquetFactoryOptions::partition_base_dir__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_parquet_factory_options_1a0500a2e1614da5dfefc3c16800c73956"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partition_base_dir</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetFactoryOptions18partition_base_dirE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>For the purposes of applying the partitioning, paths will be stripped of the partition_base_dir. </p>
<p>Files not matching the partition_base_dir prefix will be skipped for partition discovery. The ignored files will still be part of the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>, but will not have partition information.</p>
<p>Example: partition_base_dir = “/dataset”;</p>
<p><ul class="simple">
<li><p>“/dataset/US/sales.csv” -&gt; “US/sales.csv” will be given to the partitioning</p></li>
<li><p>”/home/john/late_sales.csv” -&gt; Will be ignored for partition discovery.</p></li>
</ul>
</p>
<p>This is useful for partitioning which parses directory when ordering is important, e.g. <a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">DirectoryPartitioning</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetFactoryOptions27validate_column_chunk_pathsE">
<span id="_CPPv3N5arrow7dataset21ParquetFactoryOptions27validate_column_chunk_pathsE"></span><span id="_CPPv2N5arrow7dataset21ParquetFactoryOptions27validate_column_chunk_pathsE"></span><span id="arrow::dataset::ParquetFactoryOptions::validate_column_chunk_paths__b"></span><span class="target" id="structarrow_1_1dataset_1_1_parquet_factory_options_1a1bef5686124c1bf64c02f53ce783624c"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">validate_column_chunk_paths</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetFactoryOptions27validate_column_chunk_pathsE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Assert that all ColumnChunk paths are consistent. </p>
<p>The parquet spec allows for ColumnChunk data to be stored in multiple files, but <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_dataset_factory"><span class="std std-ref">ParquetDatasetFactory</span></a> supports only a single file with all ColumnChunk data. If this flag is set construction of a <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_dataset_factory"><span class="std std-ref">ParquetDatasetFactory</span></a> will raise an error if ColumnChunk data is not resident in a single file. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetDatasetFactoryE">
<span id="_CPPv3N5arrow7dataset21ParquetDatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset21ParquetDatasetFactoryE"></span><span id="arrow::dataset::ParquetDatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ParquetDatasetFactory</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetDatasetFactoryE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;arrow/dataset/file_parquet.h&gt;</em></div>
<p>Create <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> from custom <code class="docutils literal notranslate"><span class="pre">_metadata</span></code> cache file. </p>
<p>Dask and other systems will generate a cache metadata file by concatenating the RowGroupMetaData of multiple parquet files into a single parquet file that only contains metadata and no ColumnChunk data.</p>
<p><a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_dataset_factory"><span class="std std-ref">ParquetDatasetFactory</span></a> creates a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> composed of <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_file_fragment"><span class="std std-ref">ParquetFileFragment</span></a> where each fragment is pre-populated with the exact number of row groups and statistics for each columns. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetDatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset21ParquetDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset21ParquetDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::ParquetDatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_dataset_factory_1a4a9bd91554ebb05e2bc0ca3dce095170"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetDatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get the schemas of the Fragments and <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetDatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset21ParquetDatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset21ParquetDatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::ParquetDatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_dataset_factory_1a826aa3e2a23f3dc508a35d60eca053e8"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetDatasetFactory6FinishE13FinishOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given options. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetDatasetFactory4MakeERKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions">
<span id="_CPPv3N5arrow7dataset21ParquetDatasetFactory4MakeERKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions"></span><span id="_CPPv2N5arrow7dataset21ParquetDatasetFactory4MakeERKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions"></span><span id="arrow::dataset::ParquetDatasetFactory::Make__ssCR.std::shared_ptr:fs::FileSystem:.std::shared_ptr:ParquetFileFormat:.ParquetFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_dataset_factory_1ab597d3aee8ce2827fafb1dd2f1583853"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">metadata_path</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset17ParquetFileFormatE" title="arrow::dataset::ParquetFileFormat"><span class="n"><span class="pre">ParquetFileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset21ParquetFactoryOptionsE" title="arrow::dataset::ParquetFactoryOptions"><span class="n"><span class="pre">ParquetFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetDatasetFactory4MakeERKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_dataset_factory"><span class="std std-ref">ParquetDatasetFactory</span></a> from a metadata path. </p>
<p>The <code class="docutils literal notranslate"><span class="pre">metadata_path</span></code> will be read from <code class="docutils literal notranslate"><span class="pre">filesystem</span></code>. Each RowGroup contained in the metadata file will be relative to <code class="docutils literal notranslate"><span class="pre">dirname(metadata_path)</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>metadata_path</strong><strong>[in]</strong> path of the metadata parquet file </p></li>
<li><p><strong>filesystem</strong><strong>[in]</strong> from which to open/read the path </p></li>
<li><p><strong>format</strong><strong>[in]</strong> to read the file with. </p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_parquet_factory_options"><span class="std std-ref">ParquetFactoryOptions</span></a></p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21ParquetDatasetFactory4MakeERK10FileSourceRKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions">
<span id="_CPPv3N5arrow7dataset21ParquetDatasetFactory4MakeERK10FileSourceRKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions"></span><span id="_CPPv2N5arrow7dataset21ParquetDatasetFactory4MakeERK10FileSourceRKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions"></span><span id="arrow::dataset::ParquetDatasetFactory::Make__FileSourceCR.ssCR.std::shared_ptr:fs::FileSystem:.std::shared_ptr:ParquetFileFormat:.ParquetFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_parquet_dataset_factory_1a4f72472db102521aeb9ec38d4ed7965b"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">metadata</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">base_path</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset17ParquetFileFormatE" title="arrow::dataset::ParquetFileFormat"><span class="n"><span class="pre">ParquetFileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset21ParquetFactoryOptionsE" title="arrow::dataset::ParquetFactoryOptions"><span class="n"><span class="pre">ParquetFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21ParquetDatasetFactory4MakeERK10FileSourceRKNSt6stringENSt10shared_ptrIN2fs10FileSystemEEENSt10shared_ptrI17ParquetFileFormatEE21ParquetFactoryOptions" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_parquet_dataset_factory"><span class="std std-ref">ParquetDatasetFactory</span></a> from a metadata source. </p>
<p>Similar to the previous Make definition, but the metadata can be a <a class="reference internal" href="memory.html#classarrow_1_1_buffer"><span class="std std-ref">Buffer</span></a> and the base_path is explicit instead of inferred from the metadata path.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>metadata</strong><strong>[in]</strong> source to open the metadata parquet file from </p></li>
<li><p><strong>base_path</strong><strong>[in]</strong> used as the prefix of every parquet files referenced </p></li>
<li><p><strong>filesystem</strong><strong>[in]</strong> from which to read the files referenced. </p></li>
<li><p><strong>format</strong><strong>[in]</strong> to read the file with. </p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_parquet_factory_options"><span class="std std-ref">ParquetFactoryOptions</span></a></p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N7skyhook12RadosConnCtxE">
<span id="_CPPv3N7skyhook12RadosConnCtxE"></span><span id="_CPPv2N7skyhook12RadosConnCtxE"></span><span id="skyhook::RadosConnCtx"></span><span class="target" id="structskyhook_1_1_rados_conn_ctx"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">RadosConnCtx</span></span></span><a class="headerlink" href="#_CPPv4N7skyhook12RadosConnCtxE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;skyhook/client/file_skyhook.h&gt;</em></div>
<p>A struct to hold the parameters required for connecting to a RADOS cluster. </p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N7skyhook17SkyhookFileFormatE">
<span id="_CPPv3N7skyhook17SkyhookFileFormatE"></span><span id="_CPPv2N7skyhook17SkyhookFileFormatE"></span><span id="skyhook::SkyhookFileFormat"></span><span class="target" id="classskyhook_1_1_skyhook_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SkyhookFileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><a class="headerlink" href="#_CPPv4N7skyhook17SkyhookFileFormatE" title="Permalink to this definition">#</a><br /></dt>
<dd><div class="docutils container">
<em>#include &lt;skyhook/client/file_skyhook.h&gt;</em></div>
<p>A FileFormat implementation that offloads fragment scan operations to the Ceph OSDs. </p>
<p>For more details, see the Skyhook paper, <a class="reference external" href="https://arxiv.org/pdf/2105.09894.pdf">https://arxiv.org/pdf/2105.09894.pdf</a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK7skyhook17SkyhookFileFormat9type_nameEv">
<span id="_CPPv3NK7skyhook17SkyhookFileFormat9type_nameEv"></span><span id="_CPPv2NK7skyhook17SkyhookFileFormat9type_nameEv"></span><span id="skyhook::SkyhookFileFormat::type_nameC"></span><span class="target" id="classskyhook_1_1_skyhook_file_format_1a4aacd5bc4720cab35d57a0d5b20ae638"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK7skyhook17SkyhookFileFormat9type_nameEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK7skyhook17SkyhookFileFormat11IsSupportedERKN5arrow7dataset10FileSourceE">
<span id="_CPPv3NK7skyhook17SkyhookFileFormat11IsSupportedERKN5arrow7dataset10FileSourceE"></span><span id="_CPPv2NK7skyhook17SkyhookFileFormat11IsSupportedERKN5arrow7dataset10FileSourceE"></span><span id="skyhook::SkyhookFileFormat::IsSupported__arrow::dataset::FileSourceCRC"></span><span class="target" id="classskyhook_1_1_skyhook_file_format_1ad9fd0e6ad7557f982103148714fd6e17"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK7skyhook17SkyhookFileFormat11IsSupportedERKN5arrow7dataset10FileSourceE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Indicate if the FileSource is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK7skyhook17SkyhookFileFormat7InspectERKN5arrow7dataset10FileSourceE">
<span id="_CPPv3NK7skyhook17SkyhookFileFormat7InspectERKN5arrow7dataset10FileSourceE"></span><span id="_CPPv2NK7skyhook17SkyhookFileFormat7InspectERKN5arrow7dataset10FileSourceE"></span><span id="skyhook::SkyhookFileFormat::Inspect__arrow::dataset::FileSourceCRC"></span><span class="target" id="classskyhook_1_1_skyhook_file_format_1a6f0761d8ce899247d556943d3b80a8ff"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK7skyhook17SkyhookFileFormat7InspectERKN5arrow7dataset10FileSourceE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Return the schema of the file fragment. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>source</strong><strong>[in]</strong> The source of the file fragment. </p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>The schema of the file fragment. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK7skyhook17SkyhookFileFormat10MakeWriterENSt10shared_ptrIN5arrow2io12OutputStreamEEENSt10shared_ptrIN5arrow6SchemaEEENSt10shared_ptrIN5arrow7dataset16FileWriteOptionsEEEN5arrow2fs11FileLocatorE">
<span id="_CPPv3NK7skyhook17SkyhookFileFormat10MakeWriterENSt10shared_ptrIN5arrow2io12OutputStreamEEENSt10shared_ptrIN5arrow6SchemaEEENSt10shared_ptrIN5arrow7dataset16FileWriteOptionsEEEN5arrow2fs11FileLocatorE"></span><span id="_CPPv2NK7skyhook17SkyhookFileFormat10MakeWriterENSt10shared_ptrIN5arrow2io12OutputStreamEEENSt10shared_ptrIN5arrow6SchemaEEENSt10shared_ptrIN5arrow7dataset16FileWriteOptionsEEEN5arrow2fs11FileLocatorE"></span><span id="skyhook::SkyhookFileFormat::MakeWriter__std::shared_ptr:arrow::io::OutputStream:.std::shared_ptr:arrow::Schema:.std::shared_ptr:arrow::dataset::FileWriteOptions:.arrow::fs::FileLocatorC"></span><span class="target" id="classskyhook_1_1_skyhook_file_format_1a6472a988b3f7bb5b5e40aa6c687107fb"></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span>, <span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileLocator</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination_locator</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK7skyhook17SkyhookFileFormat10MakeWriterENSt10shared_ptrIN5arrow2io12OutputStreamEEENSt10shared_ptrIN5arrow6SchemaEEENSt10shared_ptrIN5arrow7dataset16FileWriteOptionsEEEN5arrow2fs11FileLocatorE" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N7skyhook17SkyhookFileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N7skyhook17SkyhookFileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N7skyhook17SkyhookFileFormat19DefaultWriteOptionsEv"></span><span id="skyhook::SkyhookFileFormat::DefaultWriteOptions"></span><span class="target" id="classskyhook_1_1_skyhook_file_format_1a96aee0544e82d68ab5ea25967d884a24"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N7skyhook17SkyhookFileFormat19DefaultWriteOptionsEv" title="Permalink to this definition">#</a><br /></dt>
<dd><p>Get default write options for this format. </p>
</dd></dl>
</div>
</dd></dl>
</section>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="filesystem.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Filesystems</p>
</div>
</a>
<a class="right-next"
href="../../java/index.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Java Implementation</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#interface">Interface</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#partitioning">Partitioning</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset-discovery-factories">Dataset discovery/factories</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#scanning">Scanning</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#concrete-implementations">Concrete implementations</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#file-system-datasets">File System Datasets</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#file-formats">File Formats</a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/api/dataset.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>