blob: c1ea836857aaffe994fce3c3f42995a5b0d8bd34 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Dataset &#8212; Apache Arrow v4.0.1</title>
<link href="../../_static/css/theme.css" rel="stylesheet" />
<link href="../../_static/css/index.c5995385ac14fb8791e8eb36b4908be2.css" rel="stylesheet" />
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../_static/basic.css" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css" />
<link rel="preload" as="script" href="../../_static/js/index.1c5a1a01449ed65a7b51.js">
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/api/dataset.html" />
<link rel="shortcut icon" href="../../_static/favicon.ico"/>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/api/dataset.html" />
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Java Implementation" href="../../java/index.html" />
<link rel="prev" title="Filesystems" href="filesystem.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/arrow.png" class="logo" alt="logo">
</a>
<form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p class="caption">
<span class="caption-text">
Specifications and Protocols
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../format/Versioning.html">
Format Versioning and Stability
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../format/Columnar.html">
Arrow Columnar Format
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../format/Flight.html">
Arrow Flight RPC
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../format/Integration.html">
Integration Testing
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../format/CDataInterface.html">
The Arrow C data interface
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../format/CStreamInterface.html">
The Arrow C stream interface
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../format/Other.html">
Other Data Structures
</a>
</li>
</ul>
<p class="caption">
<span class="caption-text">
Libraries
</span>
</p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://arrow.apache.org/docs/c_glib/">
C/GLib
</a>
</li>
<li class="toctree-l1 current active has-children">
<a class="reference internal" href="../index.html">
C++
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/>
<label for="toctree-checkbox-1">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l2 has-children">
<a class="reference internal" href="../getting_started.html">
User Guide
</a>
<input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/>
<label for="toctree-checkbox-2">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../overview.html">
High-Level Overview
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../conventions.html">
Conventions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../cmake.html">
Using Arrow C++ in your own project
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../memory.html">
Memory Management
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../arrays.html">
Arrays
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../datatypes.html">
Data Types
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../tables.html">
Tabular Data
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../compute.html">
Compute Functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../io.html">
Input / output and filesystems
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../ipc.html">
Reading and writing the Arrow IPC format
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../parquet.html">
Reading and writing Parquet files
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../csv.html">
Reading CSV files
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../json.html">
Reading JSON files
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../dataset.html">
Tabular Datasets
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../flight.html">
Arrow Flight RPC
</a>
</li>
</ul>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../examples/index.html">
Examples
</a>
<input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/>
<label for="toctree-checkbox-3">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../examples/cmake_minimal_build.html">
Minimal build using CMake
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../examples/dataset_documentation_example.html">
Arrow Datasets example
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../examples/row_columnar_conversion.html">
Row to columnar conversion
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../examples/tuple_range_conversion.html">
std::tuple-like ranges to Arrow
</a>
</li>
</ul>
</li>
<li class="toctree-l2 current active has-children">
<a class="reference internal" href="../api.html">
API Reference
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/>
<label for="toctree-checkbox-4">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l3">
<a class="reference internal" href="support.html">
Programming Support
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="memory.html">
Memory (management)
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="datatype.html">
Data Types
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="array.html">
Arrays
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="scalar.html">
Scalars
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="builder.html">
Array Builders
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="table.html">
Two-dimensional Datasets
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="c_abi.html">
C Interfaces
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="compute.html">
Compute Functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="tensor.html">
Tensors
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="utilities.html">
Utilities
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="io.html">
Input / output
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="ipc.html">
Arrow IPC
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="formats.html">
File Formats
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="cuda.html">
CUDA support
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="flight.html">
Arrow Flight RPC
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="filesystem.html">
Filesystems
</a>
</li>
<li class="toctree-l3 current active">
<a class="current reference internal" href="#">
Dataset
</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md">
C#
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://godoc.org/github.com/apache/arrow/go/arrow">
Go
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../java/index.html">
Java
</a>
<input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/>
<label for="toctree-checkbox-5">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../java/vector.html">
ValueVector
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../java/vector_schema_root.html">
VectorSchemaRoot
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../java/ipc.html">
Reading/Writing IPC formats
</a>
</li>
<li class="toctree-l2">
<a class="reference external" href="https://arrow.apache.org/docs/java/reference/">
Reference (javadoc)
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://arrow.apache.org/docs/js/">
JavaScript
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/julia/Arrow/README.md">
Julia
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md">
MATLAB
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../python/index.html">
Python
</a>
<input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/>
<label for="toctree-checkbox-6">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../python/install.html">
Installing PyArrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/memory.html">
Memory and IO Interfaces
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/data.html">
Data Types and In-Memory Data Model
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/compute.html">
Compute Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/ipc.html">
Streaming, Serialization, and IPC
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/filesystems.html">
Filesystem Interface
</a>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../../python/filesystems_deprecated.html">
Filesystem Interface (legacy)
</a>
<input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" type="checkbox"/>
<label for="toctree-checkbox-7">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.hdfs.connect.html">
pyarrow.hdfs.connect
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.cat.html">
pyarrow.HadoopFileSystem.cat
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.chmod.html">
pyarrow.HadoopFileSystem.chmod
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.chown.html">
pyarrow.HadoopFileSystem.chown
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.delete.html">
pyarrow.HadoopFileSystem.delete
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.df.html">
pyarrow.HadoopFileSystem.df
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.disk_usage.html">
pyarrow.HadoopFileSystem.disk_usage
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.download.html">
pyarrow.HadoopFileSystem.download
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.exists.html">
pyarrow.HadoopFileSystem.exists
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.get_capacity.html">
pyarrow.HadoopFileSystem.get_capacity
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.get_space_used.html">
pyarrow.HadoopFileSystem.get_space_used
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.info.html">
pyarrow.HadoopFileSystem.info
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.ls.html">
pyarrow.HadoopFileSystem.ls
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.mkdir.html">
pyarrow.HadoopFileSystem.mkdir
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.open.html">
pyarrow.HadoopFileSystem.open
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.rename.html">
pyarrow.HadoopFileSystem.rename
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.rm.html">
pyarrow.HadoopFileSystem.rm
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HadoopFileSystem.upload.html">
pyarrow.HadoopFileSystem.upload
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../python/generated/pyarrow.HdfsFile.html">
pyarrow.HdfsFile
</a>
</li>
</ul>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/plasma.html">
The Plasma In-Memory Object Store
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/numpy.html">
NumPy Integration
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/pandas.html">
Pandas Integration
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/timestamps.html">
Timestamps
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/csv.html">
Reading CSV files
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/feather.html">
Feather File Format
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/json.html">
Reading JSON files
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/parquet.html">
Reading and Writing the Apache Parquet Format
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/dataset.html">
Tabular Datasets
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/cuda.html">
CUDA Integration
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/extending_types.html">
Extending pyarrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/extending.html">
Using pyarrow from C++ and Cython Code
</a>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../../python/api.html">
API Reference
</a>
<input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" type="checkbox"/>
<label for="toctree-checkbox-8">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/datatypes.html">
Data Types and Schemas
</a>
<input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" type="checkbox"/>
<label for="toctree-checkbox-9">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.null.html">
pyarrow.null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.bool_.html">
pyarrow.bool_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.int8.html">
pyarrow.int8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.int16.html">
pyarrow.int16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.int32.html">
pyarrow.int32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.int64.html">
pyarrow.int64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.uint8.html">
pyarrow.uint8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.uint16.html">
pyarrow.uint16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.uint32.html">
pyarrow.uint32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.uint64.html">
pyarrow.uint64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.float16.html">
pyarrow.float16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.float32.html">
pyarrow.float32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.float64.html">
pyarrow.float64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.time32.html">
pyarrow.time32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.time64.html">
pyarrow.time64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.timestamp.html">
pyarrow.timestamp
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.date32.html">
pyarrow.date32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.date64.html">
pyarrow.date64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.binary.html">
pyarrow.binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.string.html">
pyarrow.string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.utf8.html">
pyarrow.utf8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.large_binary.html">
pyarrow.large_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.large_string.html">
pyarrow.large_string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.large_utf8.html">
pyarrow.large_utf8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.decimal128.html">
pyarrow.decimal128
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.list_.html">
pyarrow.list_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.large_list.html">
pyarrow.large_list
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.map_.html">
pyarrow.map_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.struct.html">
pyarrow.struct
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dictionary.html">
pyarrow.dictionary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.field.html">
pyarrow.field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.schema.html">
pyarrow.schema
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.from_numpy_dtype.html">
pyarrow.from_numpy_dtype
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.DataType.html">
pyarrow.DataType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.DictionaryType.html">
pyarrow.DictionaryType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ListType.html">
pyarrow.ListType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.MapType.html">
pyarrow.MapType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.StructType.html">
pyarrow.StructType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UnionType.html">
pyarrow.UnionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.TimestampType.html">
pyarrow.TimestampType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Time32Type.html">
pyarrow.Time32Type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Time64Type.html">
pyarrow.Time64Type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.FixedSizeBinaryType.html">
pyarrow.FixedSizeBinaryType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Decimal128Type.html">
pyarrow.Decimal128Type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Field.html">
pyarrow.Field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Schema.html">
pyarrow.Schema
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ExtensionType.html">
pyarrow.ExtensionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.PyExtensionType.html">
pyarrow.PyExtensionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.register_extension_type.html">
pyarrow.register_extension_type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.unregister_extension_type.html">
pyarrow.unregister_extension_type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_boolean.html">
pyarrow.types.is_boolean
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_integer.html">
pyarrow.types.is_integer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_signed_integer.html">
pyarrow.types.is_signed_integer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_unsigned_integer.html">
pyarrow.types.is_unsigned_integer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_int8.html">
pyarrow.types.is_int8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_int16.html">
pyarrow.types.is_int16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_int32.html">
pyarrow.types.is_int32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_int64.html">
pyarrow.types.is_int64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_uint8.html">
pyarrow.types.is_uint8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_uint16.html">
pyarrow.types.is_uint16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_uint32.html">
pyarrow.types.is_uint32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_uint64.html">
pyarrow.types.is_uint64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_floating.html">
pyarrow.types.is_floating
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_float16.html">
pyarrow.types.is_float16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_float32.html">
pyarrow.types.is_float32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_float64.html">
pyarrow.types.is_float64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_decimal.html">
pyarrow.types.is_decimal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_list.html">
pyarrow.types.is_list
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_large_list.html">
pyarrow.types.is_large_list
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_struct.html">
pyarrow.types.is_struct
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_union.html">
pyarrow.types.is_union
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_nested.html">
pyarrow.types.is_nested
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_temporal.html">
pyarrow.types.is_temporal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_timestamp.html">
pyarrow.types.is_timestamp
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_date.html">
pyarrow.types.is_date
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_date32.html">
pyarrow.types.is_date32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_date64.html">
pyarrow.types.is_date64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_time.html">
pyarrow.types.is_time
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_time32.html">
pyarrow.types.is_time32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_time64.html">
pyarrow.types.is_time64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_null.html">
pyarrow.types.is_null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_binary.html">
pyarrow.types.is_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_unicode.html">
pyarrow.types.is_unicode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_string.html">
pyarrow.types.is_string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_large_binary.html">
pyarrow.types.is_large_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_large_unicode.html">
pyarrow.types.is_large_unicode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_large_string.html">
pyarrow.types.is_large_string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_fixed_size_binary.html">
pyarrow.types.is_fixed_size_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_map.html">
pyarrow.types.is_map
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.types.is_dictionary.html">
pyarrow.types.is_dictionary
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/arrays.html">
Arrays and Scalars
</a>
<input class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" type="checkbox"/>
<label for="toctree-checkbox-10">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.array.html">
pyarrow.array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.nulls.html">
pyarrow.nulls
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Array.html">
pyarrow.Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.BooleanArray.html">
pyarrow.BooleanArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.FloatingPointArray.html">
pyarrow.FloatingPointArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.IntegerArray.html">
pyarrow.IntegerArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int8Array.html">
pyarrow.Int8Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int16Array.html">
pyarrow.Int16Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int32Array.html">
pyarrow.Int32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int64Array.html">
pyarrow.Int64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.NullArray.html">
pyarrow.NullArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.NumericArray.html">
pyarrow.NumericArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt8Array.html">
pyarrow.UInt8Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt16Array.html">
pyarrow.UInt16Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt32Array.html">
pyarrow.UInt32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt64Array.html">
pyarrow.UInt64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.BinaryArray.html">
pyarrow.BinaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.StringArray.html">
pyarrow.StringArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.FixedSizeBinaryArray.html">
pyarrow.FixedSizeBinaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LargeBinaryArray.html">
pyarrow.LargeBinaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LargeStringArray.html">
pyarrow.LargeStringArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Time32Array.html">
pyarrow.Time32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Time64Array.html">
pyarrow.Time64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Date32Array.html">
pyarrow.Date32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Date64Array.html">
pyarrow.Date64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.TimestampArray.html">
pyarrow.TimestampArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Decimal128Array.html">
pyarrow.Decimal128Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.DictionaryArray.html">
pyarrow.DictionaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ListArray.html">
pyarrow.ListArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LargeListArray.html">
pyarrow.LargeListArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.StructArray.html">
pyarrow.StructArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UnionArray.html">
pyarrow.UnionArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ExtensionArray.html">
pyarrow.ExtensionArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.scalar.html">
pyarrow.scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.NA.html">
pyarrow.NA
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Scalar.html">
pyarrow.Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.BooleanScalar.html">
pyarrow.BooleanScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int8Scalar.html">
pyarrow.Int8Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int16Scalar.html">
pyarrow.Int16Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int32Scalar.html">
pyarrow.Int32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Int64Scalar.html">
pyarrow.Int64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt8Scalar.html">
pyarrow.UInt8Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt16Scalar.html">
pyarrow.UInt16Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt32Scalar.html">
pyarrow.UInt32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UInt64Scalar.html">
pyarrow.UInt64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.FloatScalar.html">
pyarrow.FloatScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.DoubleScalar.html">
pyarrow.DoubleScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.BinaryScalar.html">
pyarrow.BinaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.StringScalar.html">
pyarrow.StringScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.FixedSizeBinaryScalar.html">
pyarrow.FixedSizeBinaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LargeBinaryScalar.html">
pyarrow.LargeBinaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LargeStringScalar.html">
pyarrow.LargeStringScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Time32Scalar.html">
pyarrow.Time32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Time64Scalar.html">
pyarrow.Time64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Date32Scalar.html">
pyarrow.Date32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Date64Scalar.html">
pyarrow.Date64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.TimestampScalar.html">
pyarrow.TimestampScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Decimal128Scalar.html">
pyarrow.Decimal128Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.DictionaryScalar.html">
pyarrow.DictionaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ListScalar.html">
pyarrow.ListScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LargeListScalar.html">
pyarrow.LargeListScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.StructScalar.html">
pyarrow.StructScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.UnionScalar.html">
pyarrow.UnionScalar
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/memory.html">
Buffers and Memory
</a>
<input class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" type="checkbox"/>
<label for="toctree-checkbox-11">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.allocate_buffer.html">
pyarrow.allocate_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.py_buffer.html">
pyarrow.py_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.foreign_buffer.html">
pyarrow.foreign_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Buffer.html">
pyarrow.Buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ResizableBuffer.html">
pyarrow.ResizableBuffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compress.html">
pyarrow.compress
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.decompress.html">
pyarrow.decompress
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.MemoryPool.html">
pyarrow.MemoryPool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.default_memory_pool.html">
pyarrow.default_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.jemalloc_memory_pool.html">
pyarrow.jemalloc_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.mimalloc_memory_pool.html">
pyarrow.mimalloc_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.system_memory_pool.html">
pyarrow.system_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.jemalloc_set_decay_ms.html">
pyarrow.jemalloc_set_decay_ms
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.set_memory_pool.html">
pyarrow.set_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.log_memory_allocations.html">
pyarrow.log_memory_allocations
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.total_allocated_bytes.html">
pyarrow.total_allocated_bytes
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/compute.html">
Compute Functions
</a>
<input class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" type="checkbox"/>
<label for="toctree-checkbox-12">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.count.html">
pyarrow.compute.count
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.mean.html">
pyarrow.compute.mean
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.min_max.html">
pyarrow.compute.min_max
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.mode.html">
pyarrow.compute.mode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.stddev.html">
pyarrow.compute.stddev
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.sum.html">
pyarrow.compute.sum
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.variance.html">
pyarrow.compute.variance
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.add.html">
pyarrow.compute.add
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.add_checked.html">
pyarrow.compute.add_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.divide.html">
pyarrow.compute.divide
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.divide_checked.html">
pyarrow.compute.divide_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.multiply.html">
pyarrow.compute.multiply
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.multiply_checked.html">
pyarrow.compute.multiply_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.subtract.html">
pyarrow.compute.subtract
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.subtract_checked.html">
pyarrow.compute.subtract_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.power.html">
pyarrow.compute.power
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.power_checked.html">
pyarrow.compute.power_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.equal.html">
pyarrow.compute.equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.greater.html">
pyarrow.compute.greater
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.greater_equal.html">
pyarrow.compute.greater_equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.less.html">
pyarrow.compute.less
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.less_equal.html">
pyarrow.compute.less_equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.not_equal.html">
pyarrow.compute.not_equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.and_.html">
pyarrow.compute.and_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.and_kleene.html">
pyarrow.compute.and_kleene
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.all.html">
pyarrow.compute.all
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.any.html">
pyarrow.compute.any
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.invert.html">
pyarrow.compute.invert
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.or_.html">
pyarrow.compute.or_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.or_kleene.html">
pyarrow.compute.or_kleene
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.xor.html">
pyarrow.compute.xor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_alnum.html">
pyarrow.compute.ascii_is_alnum
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_alpha.html">
pyarrow.compute.ascii_is_alpha
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_decimal.html">
pyarrow.compute.ascii_is_decimal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_lower.html">
pyarrow.compute.ascii_is_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_printable.html">
pyarrow.compute.ascii_is_printable
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_space.html">
pyarrow.compute.ascii_is_space
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_upper.html">
pyarrow.compute.ascii_is_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_alnum.html">
pyarrow.compute.utf8_is_alnum
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_alpha.html">
pyarrow.compute.utf8_is_alpha
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_decimal.html">
pyarrow.compute.utf8_is_decimal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_digit.html">
pyarrow.compute.utf8_is_digit
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_lower.html">
pyarrow.compute.utf8_is_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_numeric.html">
pyarrow.compute.utf8_is_numeric
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_printable.html">
pyarrow.compute.utf8_is_printable
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_space.html">
pyarrow.compute.utf8_is_space
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_upper.html">
pyarrow.compute.utf8_is_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_is_title.html">
pyarrow.compute.ascii_is_title
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_is_title.html">
pyarrow.compute.utf8_is_title
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.string_is_ascii.html">
pyarrow.compute.string_is_ascii
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_lower.html">
pyarrow.compute.ascii_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.ascii_upper.html">
pyarrow.compute.ascii_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_lower.html">
pyarrow.compute.utf8_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.utf8_upper.html">
pyarrow.compute.utf8_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.index_in.html">
pyarrow.compute.index_in
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.is_in.html">
pyarrow.compute.is_in
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.match_substring.html">
pyarrow.compute.match_substring
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.match_substring_regex.html">
pyarrow.compute.match_substring_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.cast.html">
pyarrow.compute.cast
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.strptime.html">
pyarrow.compute.strptime
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.filter.html">
pyarrow.compute.filter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.take.html">
pyarrow.compute.take
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.dictionary_encode.html">
pyarrow.compute.dictionary_encode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.unique.html">
pyarrow.compute.unique
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.value_counts.html">
pyarrow.compute.value_counts
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.partition_nth_indices.html">
pyarrow.compute.partition_nth_indices
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.sort_indices.html">
pyarrow.compute.sort_indices
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.binary_length.html">
pyarrow.compute.binary_length
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.fill_null.html">
pyarrow.compute.fill_null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.is_null.html">
pyarrow.compute.is_null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.is_valid.html">
pyarrow.compute.is_valid
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.list_value_length.html">
pyarrow.compute.list_value_length
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.list_flatten.html">
pyarrow.compute.list_flatten
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.compute.list_parent_indices.html">
pyarrow.compute.list_parent_indices
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/files.html">
Streams and File Access
</a>
<input class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" type="checkbox"/>
<label for="toctree-checkbox-13">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.input_stream.html">
pyarrow.input_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.output_stream.html">
pyarrow.output_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.memory_map.html">
pyarrow.memory_map
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.create_memory_map.html">
pyarrow.create_memory_map
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.NativeFile.html">
pyarrow.NativeFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.OSFile.html">
pyarrow.OSFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.PythonFile.html">
pyarrow.PythonFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.BufferReader.html">
pyarrow.BufferReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.BufferOutputStream.html">
pyarrow.BufferOutputStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.FixedSizeBufferWriter.html">
pyarrow.FixedSizeBufferWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.MemoryMappedFile.html">
pyarrow.MemoryMappedFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.CompressedInputStream.html">
pyarrow.CompressedInputStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.CompressedOutputStream.html">
pyarrow.CompressedOutputStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.hdfs.connect.html">
pyarrow.hdfs.connect
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.LocalFileSystem.html">
pyarrow.LocalFileSystem
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/tables.html">
Tables and Tensors
</a>
<input class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" type="checkbox"/>
<label for="toctree-checkbox-14">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.chunked_array.html">
pyarrow.chunked_array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.concat_arrays.html">
pyarrow.concat_arrays
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.concat_tables.html">
pyarrow.concat_tables
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.record_batch.html">
pyarrow.record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.table.html">
pyarrow.table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ChunkedArray.html">
pyarrow.ChunkedArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.RecordBatch.html">
pyarrow.RecordBatch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Table.html">
pyarrow.Table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.Tensor.html">
pyarrow.Tensor
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/ipc.html">
Serialization and IPC
</a>
<input class="toctree-checkbox" id="toctree-checkbox-15" name="toctree-checkbox-15" type="checkbox"/>
<label for="toctree-checkbox-15">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.new_file.html">
pyarrow.ipc.new_file
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.open_file.html">
pyarrow.ipc.open_file
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.new_stream.html">
pyarrow.ipc.new_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.open_stream.html">
pyarrow.ipc.open_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.read_message.html">
pyarrow.ipc.read_message
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.read_record_batch.html">
pyarrow.ipc.read_record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.get_record_batch_size.html">
pyarrow.ipc.get_record_batch_size
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.read_tensor.html">
pyarrow.ipc.read_tensor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.write_tensor.html">
pyarrow.ipc.write_tensor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.get_tensor_size.html">
pyarrow.ipc.get_tensor_size
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.Message.html">
pyarrow.ipc.Message
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.MessageReader.html">
pyarrow.ipc.MessageReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.RecordBatchFileReader.html">
pyarrow.ipc.RecordBatchFileReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.RecordBatchFileWriter.html">
pyarrow.ipc.RecordBatchFileWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.RecordBatchStreamReader.html">
pyarrow.ipc.RecordBatchStreamReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.ipc.RecordBatchStreamWriter.html">
pyarrow.ipc.RecordBatchStreamWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.serialize.html">
pyarrow.serialize
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.serialize_to.html">
pyarrow.serialize_to
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.deserialize.html">
pyarrow.deserialize
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.deserialize_components.html">
pyarrow.deserialize_components
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.deserialize_from.html">
pyarrow.deserialize_from
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.read_serialized.html">
pyarrow.read_serialized
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.SerializedPyObject.html">
pyarrow.SerializedPyObject
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.SerializationContext.html">
pyarrow.SerializationContext
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/flight.html">
Arrow Flight
</a>
<input class="toctree-checkbox" id="toctree-checkbox-16" name="toctree-checkbox-16" type="checkbox"/>
<label for="toctree-checkbox-16">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.Action.html">
pyarrow.flight.Action
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ActionType.html">
pyarrow.flight.ActionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.DescriptorType.html">
pyarrow.flight.DescriptorType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightDescriptor.html">
pyarrow.flight.FlightDescriptor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightEndpoint.html">
pyarrow.flight.FlightEndpoint
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightInfo.html">
pyarrow.flight.FlightInfo
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.Location.html">
pyarrow.flight.Location
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.Ticket.html">
pyarrow.flight.Ticket
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.Result.html">
pyarrow.flight.Result
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightCallOptions.html">
pyarrow.flight.FlightCallOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightClient.html">
pyarrow.flight.FlightClient
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ClientMiddlewareFactory.html">
pyarrow.flight.ClientMiddlewareFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ClientMiddleware.html">
pyarrow.flight.ClientMiddleware
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightServerBase.html">
pyarrow.flight.FlightServerBase
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.GeneratorStream.html">
pyarrow.flight.GeneratorStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.RecordBatchStream.html">
pyarrow.flight.RecordBatchStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ServerMiddlewareFactory.html">
pyarrow.flight.ServerMiddlewareFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ServerMiddleware.html">
pyarrow.flight.ServerMiddleware
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ClientAuthHandler.html">
pyarrow.flight.ClientAuthHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.ServerAuthHandler.html">
pyarrow.flight.ServerAuthHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.FlightMethod.html">
pyarrow.flight.FlightMethod
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.flight.CallInfo.html">
pyarrow.flight.CallInfo
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/formats.html">
Tabular File Formats
</a>
<input class="toctree-checkbox" id="toctree-checkbox-17" name="toctree-checkbox-17" type="checkbox"/>
<label for="toctree-checkbox-17">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.csv.ReadOptions.html">
pyarrow.csv.ReadOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.csv.ParseOptions.html">
pyarrow.csv.ParseOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.csv.ConvertOptions.html">
pyarrow.csv.ConvertOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.csv.read_csv.html">
pyarrow.csv.read_csv
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.csv.open_csv.html">
pyarrow.csv.open_csv
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.csv.CSVStreamingReader.html">
pyarrow.csv.CSVStreamingReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.feather.read_feather.html">
pyarrow.feather.read_feather
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.feather.read_table.html">
pyarrow.feather.read_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.feather.write_feather.html">
pyarrow.feather.write_feather
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.json.ReadOptions.html">
pyarrow.json.ReadOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.json.ParseOptions.html">
pyarrow.json.ParseOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.json.read_json.html">
pyarrow.json.read_json
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.ParquetDataset.html">
pyarrow.parquet.ParquetDataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.ParquetFile.html">
pyarrow.parquet.ParquetFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.ParquetWriter.html">
pyarrow.parquet.ParquetWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.read_table.html">
pyarrow.parquet.read_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.read_metadata.html">
pyarrow.parquet.read_metadata
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.read_pandas.html">
pyarrow.parquet.read_pandas
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.read_schema.html">
pyarrow.parquet.read_schema
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.write_metadata.html">
pyarrow.parquet.write_metadata
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.write_table.html">
pyarrow.parquet.write_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.parquet.write_to_dataset.html">
pyarrow.parquet.write_to_dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.orc.ORCFile.html">
pyarrow.orc.ORCFile
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/filesystems.html">
Filesystems
</a>
<input class="toctree-checkbox" id="toctree-checkbox-18" name="toctree-checkbox-18" type="checkbox"/>
<label for="toctree-checkbox-18">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.FileInfo.html">
pyarrow.fs.FileInfo
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.FileSelector.html">
pyarrow.fs.FileSelector
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.FileSystem.html">
pyarrow.fs.FileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.LocalFileSystem.html">
pyarrow.fs.LocalFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.S3FileSystem.html">
pyarrow.fs.S3FileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.HadoopFileSystem.html">
pyarrow.fs.HadoopFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.SubTreeFileSystem.html">
pyarrow.fs.SubTreeFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.PyFileSystem.html">
pyarrow.fs.PyFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.FileSystemHandler.html">
pyarrow.fs.FileSystemHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.fs.FSSpecHandler.html">
pyarrow.fs.FSSpecHandler
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/dataset.html">
Dataset
</a>
<input class="toctree-checkbox" id="toctree-checkbox-19" name="toctree-checkbox-19" type="checkbox"/>
<label for="toctree-checkbox-19">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.dataset.html">
pyarrow.dataset.dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.parquet_dataset.html">
pyarrow.dataset.parquet_dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.partitioning.html">
pyarrow.dataset.partitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.field.html">
pyarrow.dataset.field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.scalar.html">
pyarrow.dataset.scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.FileFormat.html">
pyarrow.dataset.FileFormat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.ParquetFileFormat.html">
pyarrow.dataset.ParquetFileFormat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.Partitioning.html">
pyarrow.dataset.Partitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.PartitioningFactory.html">
pyarrow.dataset.PartitioningFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.DirectoryPartitioning.html">
pyarrow.dataset.DirectoryPartitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.HivePartitioning.html">
pyarrow.dataset.HivePartitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.Dataset.html">
pyarrow.dataset.Dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.FileSystemDataset.html">
pyarrow.dataset.FileSystemDataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.FileSystemFactoryOptions.html">
pyarrow.dataset.FileSystemFactoryOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.FileSystemDatasetFactory.html">
pyarrow.dataset.FileSystemDatasetFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.UnionDataset.html">
pyarrow.dataset.UnionDataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.Scanner.html">
pyarrow.dataset.Scanner
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.dataset.Expression.html">
pyarrow.dataset.Expression
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/plasma.html">
Plasma In-Memory Object Store
</a>
<input class="toctree-checkbox" id="toctree-checkbox-20" name="toctree-checkbox-20" type="checkbox"/>
<label for="toctree-checkbox-20">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.plasma.ObjectID.html">
pyarrow.plasma.ObjectID
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.plasma.PlasmaClient.html">
pyarrow.plasma.PlasmaClient
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.plasma.PlasmaBuffer.html">
pyarrow.plasma.PlasmaBuffer
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/cuda.html">
CUDA Integration
</a>
<input class="toctree-checkbox" id="toctree-checkbox-21" name="toctree-checkbox-21" type="checkbox"/>
<label for="toctree-checkbox-21">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.Context.html">
pyarrow.cuda.Context
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.CudaBuffer.html">
pyarrow.cuda.CudaBuffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.new_host_buffer.html">
pyarrow.cuda.new_host_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.HostBuffer.html">
pyarrow.cuda.HostBuffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.BufferReader.html">
pyarrow.cuda.BufferReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.BufferWriter.html">
pyarrow.cuda.BufferWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.serialize_record_batch.html">
pyarrow.cuda.serialize_record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.read_record_batch.html">
pyarrow.cuda.read_record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.read_message.html">
pyarrow.cuda.read_message
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cuda.IpcMemHandle.html">
pyarrow.cuda.IpcMemHandle
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../python/api/misc.html">
Miscellaneous
</a>
<input class="toctree-checkbox" id="toctree-checkbox-22" name="toctree-checkbox-22" type="checkbox"/>
<label for="toctree-checkbox-22">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.cpu_count.html">
pyarrow.cpu_count
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.set_cpu_count.html">
pyarrow.set_cpu_count
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.get_include.html">
pyarrow.get_include
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.get_libraries.html">
pyarrow.get_libraries
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../python/generated/pyarrow.get_library_dirs.html">
pyarrow.get_library_dirs
</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/getting_involved.html">
Getting Involved
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../python/benchmarks.html">
Benchmarks
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://arrow.apache.org/docs/r/">
R
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md">
Ruby
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
</ul>
<p class="caption">
<span class="caption-text">
Development
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../developers/contributing.html">
Contributing to Apache Arrow
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../developers/cpp/index.html">
C++ Development
</a>
<input class="toctree-checkbox" id="toctree-checkbox-23" name="toctree-checkbox-23" type="checkbox"/>
<label for="toctree-checkbox-23">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../developers/cpp/building.html">
Building Arrow C++
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../developers/cpp/development.html">
Development Guidelines
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../developers/cpp/windows.html">
Developing on Windows
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../developers/cpp/conventions.html">
Conventions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../developers/cpp/fuzzing.html">
Fuzzing Arrow C++
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../developers/python.html">
Python Development
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../developers/archery.html">
Daily Development using Archery
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../developers/crossbow.html">
Packaging and Testing with Crossbow
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../developers/docker.html">
Running Docker Builds
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../developers/benchmarks.html">
Benchmarks
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../developers/documentation.html">
Building the Documentation
</a>
</li>
</ul>
</div>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#interface">
Interface
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#partitioning">
Partitioning
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#dataset-discovery-factories">
Dataset discovery/factories
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#scanning">
Scanning
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#concrete-implementations">
Concrete implementations
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#file-system-datasets">
File System Datasets
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#file-formats">
File Formats
</a>
</li>
</ul>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="dataset">
<h1>Dataset<a class="headerlink" href="#dataset" title="Permalink to this headline"></a></h1>
<div class="section" id="interface">
<h2>Interface<a class="headerlink" href="#interface" title="Permalink to this headline"></a></h2>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8FragmentE">
<span id="_CPPv3N5arrow7dataset8FragmentE"></span><span id="_CPPv2N5arrow7dataset8FragmentE"></span><span id="arrow::dataset::Fragment"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">Fragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">enable_shared_from_this</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8FragmentE" title="Permalink to this definition"></a><br /></dt>
<dd><p>A granular piece of a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>, such as an individual file. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> can be read/scanned separately from other fragments. It yields a collection of RecordBatches when scanned, encapsulated in one or more ScanTasks.</p>
<p>Note that Fragments have well defined physical schemas which are reconciled by the Datasets which contain them; these physical schemas may differ from a parent <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>’s schema and the physical schemas of sibling Fragments. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">arrow::dataset::FileFragment</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_in_memory_fragment"><span class="std std-ref">arrow::dataset::InMemoryFragment</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment18ReadPhysicalSchemaEv">
<span id="_CPPv3N5arrow7dataset8Fragment18ReadPhysicalSchemaEv"></span><span id="_CPPv2N5arrow7dataset8Fragment18ReadPhysicalSchemaEv"></span><span id="arrow::dataset::Fragment::ReadPhysicalSchema"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1adbc8153a7053d4f88c88e921390e006d"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReadPhysicalSchema</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment18ReadPhysicalSchemaEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the physical schema of the <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
<p>The physical schema is also called the writer schema. This method is blocking and may suffer from high latency filesystem. The schema is cached after being read once, or may be specified at construction. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment4ScanENSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset8Fragment4ScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset8Fragment4ScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::Fragment::Scan__std::shared_ptr:ScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a520471fc73da0f6920c34110ddd52c5d"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ScanTaskIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scan</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment4ScanENSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Scan returns an iterator of ScanTasks, each of which yields RecordBatches from this <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
<p>Note that batches yielded using this method will not be filtered and may not align with the <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>’s schema. In particular, note that columns referenced by the filter may be present in yielded batches even if they are not projected (so that they are available when a filter is applied). Additionally, explicitly projected columns may be absent if they were not present in this fragment.</p>
<p>To receive a record batch stream which is fully filtered and projected, use <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::Fragment::ScanBatchesAsync__std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a216bcd9c413a8ac0faecced93da5ce14"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesAsync</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8Fragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>An asynchronous version of Scan. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset8Fragment20partition_expressionEv">
<span id="_CPPv3NK5arrow7dataset8Fragment20partition_expressionEv"></span><span id="_CPPv2NK5arrow7dataset8Fragment20partition_expressionEv"></span><span id="arrow::dataset::Fragment::partition_expressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_1a5a5533735282505fafec0d992c26a77f"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partition_expression</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset8Fragment20partition_expressionEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>An expression which evaluates to true for all data viewed by this <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7DatasetE">
<span id="_CPPv3N5arrow7dataset7DatasetE"></span><span id="_CPPv2N5arrow7dataset7DatasetE"></span><span id="arrow::dataset::Dataset"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">Dataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">enable_shared_from_this</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7DatasetE" title="Permalink to this definition"></a><br /></dt>
<dd><p>A container of zero or more Fragments. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> acts as a union of Fragments, e.g. files deeply nested in a directory. A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> has a schema to which Fragments must align during a scan operation. This is analogous to Avro’s reader and writer schema. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">arrow::dataset::FileSystemDataset</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_in_memory_dataset"><span class="std std-ref">arrow::dataset::InMemoryDataset</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset"><span class="std std-ref">arrow::dataset::UnionDataset</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Dataset7NewScanENSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset7Dataset7NewScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset7Dataset7NewScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::Dataset::NewScan__std::shared_ptr:ScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1a4abc9668cacbe13d8a8a2ca62338672d"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14ScannerBuilderE" title="arrow::dataset::ScannerBuilder"><span class="n"><span class="pre">ScannerBuilder</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">NewScan</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Dataset7NewScanENSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Begin to build a new Scan operation against this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Dataset12GetFragmentsE10Expression">
<span id="_CPPv3N5arrow7dataset7Dataset12GetFragmentsE10Expression"></span><span id="_CPPv2N5arrow7dataset7Dataset12GetFragmentsE10Expression"></span><span id="arrow::dataset::Dataset::GetFragments__Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1a766c4381c0d1b65ed3a8edae74d722e4"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">FragmentIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetFragments</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">predicate</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Dataset12GetFragmentsE10Expression" title="Permalink to this definition"></a><br /></dt>
<dd><p>GetFragments returns an iterator of Fragments given a predicate. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Dataset20partition_expressionEv">
<span id="_CPPv3NK5arrow7dataset7Dataset20partition_expressionEv"></span><span id="_CPPv2NK5arrow7dataset7Dataset20partition_expressionEv"></span><span id="arrow::dataset::Dataset::partition_expressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1ace4245cead619a5cb6956e4f71bc51f5"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partition_expression</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Dataset20partition_expressionEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>An expression which evaluates to true for all data viewed by this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
<p>May be null, which indicates no information is available. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Dataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset7Dataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset7Dataset9type_nameEv"></span><span id="arrow::dataset::Dataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1ac7611e16eb019f295612a9ef428fdfd2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Dataset9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::Dataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_1aa615b8333c7ac0e5553adbc9877cb44a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Dataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return a copy of this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with a different schema. </p>
<p>The copy will view the same Fragments. If the new schema is not compatible with the original dataset’s schema then an error will be raised. </p>
</dd></dl>
</div>
</dd></dl>
</div>
<div class="section" id="partitioning">
<h2>Partitioning<a class="headerlink" href="#partitioning" title="Permalink to this headline"></a></h2>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv424kDefaultHiveNullFallback">
<span id="_CPPv324kDefaultHiveNullFallback"></span><span id="_CPPv224kDefaultHiveNullFallback"></span><span id="kDefaultHiveNullFallback__cA"></span><span class="target" id="group__dataset-partitioning_1ga748eb813cc9d96f51d5a0f534bdf26a1"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">char</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultHiveNullFallback</span></span></span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;__HIVE_DEFAULT_PARTITION__&quot;</span></span><a class="headerlink" href="#_CPPv424kDefaultHiveNullFallback" title="Permalink to this definition"></a><br /></dt>
<dd><p>The default fallback used for null values in a Hive-style partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv422StripPrefixAndFilenameRKNSt6stringERKNSt6stringE">
<span id="_CPPv322StripPrefixAndFilenameRKNSt6stringERKNSt6stringE"></span><span id="_CPPv222StripPrefixAndFilenameRKNSt6stringERKNSt6stringE"></span><span id="StripPrefixAndFilename__ssCR.ssCR"></span><span class="target" id="group__dataset-partitioning_1ga24e40e5fc8b7f84d097f8a0bcb3d77e0"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefixAndFilename</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv422StripPrefixAndFilenameRKNSt6stringERKNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Remove a prefix and the filename of a path. </p>
<p>e.g., <code class="docutils literal notranslate"><span class="pre">StripPrefixAndFilename(&quot;/data/year=2019/c.txt&quot;,</span> <span class="pre">&quot;/data&quot;)</span> <span class="pre">-&gt;</span> <span class="pre">&quot;year=2019&quot;</span></code> </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv422StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE">
<span id="_CPPv322StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE"></span><span id="_CPPv222StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE"></span><span id="StripPrefixAndFilename__std::vector:ss:CR.ssCR"></span><span class="target" id="group__dataset-partitioning_1gabfa1200ecfebac5bdff1239aa1eb8270"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefixAndFilename</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv422StripPrefixAndFilenameRKNSt6vectorINSt6stringEEERKNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Vector version of StripPrefixAndFilename. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv422StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE">
<span id="_CPPv322StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE"></span><span id="_CPPv222StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE"></span><span id="StripPrefixAndFilename__std::vector:fs::FileInfo:CR.ssCR"></span><span class="target" id="group__dataset-partitioning_1ga4f897ffe4c649c15c704853fefd5d60c"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">StripPrefixAndFilename</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">FileInfo</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">files</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">prefix</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv422StripPrefixAndFilenameRKNSt6vectorIN2fs8FileInfoEEERKNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Vector version of StripPrefixAndFilename. </p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12PartitioningE">
<span id="_CPPv3N5arrow7dataset12PartitioningE"></span><span id="_CPPv2N5arrow7dataset12PartitioningE"></span><span id="arrow::dataset::Partitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">Partitioning</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12PartitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Interface for parsing partition expressions from string partition identifiers. </p>
<p>For example, the identifier “foo=5” might be parsed to an equality expression between the “foo” field and the value 5.</p>
<p>Some partitionings may store the field names in a metadata store instead of in file paths, for example dataset_root/2009/11/… could be used when the partition fields are “year” and “month”</p>
<p>Paths are consumed from left to right. Paths must be relative to the root of a partition; path prefixes must be removed before passing the path to a partitioning for parsing. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_function_partitioning"><span class="std std-ref">arrow::dataset::FunctionPartitioning</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_key_value_partitioning"><span class="std std-ref">arrow::dataset::KeyValuePartitioning</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12Partitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset12Partitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset12Partitioning9type_nameEv"></span><span id="arrow::dataset::Partitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1acb1a6614ca4a3e55a2e56d491b80e58b"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12Partitioning9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12Partitioning5ParseERKNSt6stringE">
<span id="_CPPv3NK5arrow7dataset12Partitioning5ParseERKNSt6stringE"></span><span id="_CPPv2NK5arrow7dataset12Partitioning5ParseERKNSt6stringE"></span><span id="arrow::dataset::Partitioning::Parse__ssCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1ab926955c36fb6d2691028c8121fdd8fc"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Expression</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Parse</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12Partitioning5ParseERKNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Parse a path into a partition expression. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12Partitioning6schemaEv">
<span id="_CPPv3N5arrow7dataset12Partitioning6schemaEv"></span><span id="_CPPv2N5arrow7dataset12Partitioning6schemaEv"></span><span id="arrow::dataset::Partitioning::schema"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1ad0d9beac97c3f9703adbaa6c6a434c50"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset12Partitioning6schemaEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The partition schema. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12Partitioning7DefaultEv">
<span id="_CPPv3N5arrow7dataset12Partitioning7DefaultEv"></span><span id="_CPPv2N5arrow7dataset12Partitioning7DefaultEv"></span><span id="arrow::dataset::Partitioning::Default"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_1a157ec7c73980998eda65bb012ad6aaf2"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Default</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset12Partitioning7DefaultEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>A default <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> which always yields scalar(true) </p>
</dd></dl>
</div>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12Partitioning18PartitionedBatchesE">
<span id="_CPPv3N5arrow7dataset12Partitioning18PartitionedBatchesE"></span><span id="_CPPv2N5arrow7dataset12Partitioning18PartitionedBatchesE"></span><span id="arrow::dataset::Partitioning::PartitionedBatches"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_1_1_partitioned_batches"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">PartitionedBatches</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12Partitioning18PartitionedBatchesE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>If the input batch shares any fields with this partitioning, produce sub-batches which satisfy mutually exclusive Expressions. </p>
</dd></dl>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptionsE"></span><span id="arrow::dataset::PartitioningFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Options for inferring a partitioning. </p>
<p>Subclassed by <a class="reference internal" href="#structarrow_1_1dataset_1_1_hive_partitioning_factory_options"><span class="std std-ref">arrow::dataset::HivePartitioningFactoryOptions</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE"></span><span id="arrow::dataset::PartitioningFactoryOptions::infer_dictionary__b"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options_1a113c844746abc258034eb0edb6dd1da6"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">infer_dictionary</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptions16infer_dictionaryE" title="Permalink to this definition"></a><br /></dt>
<dd><p>When inferring a schema for partition fields, yield dictionary encoded types instead of plain. </p>
<p>This can be more efficient when materializing virtual columns, and Expressions parsed by the finished <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> will include dictionaries of all unique inspected values for each field. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset26PartitioningFactoryOptions6schemaE">
<span id="_CPPv3N5arrow7dataset26PartitioningFactoryOptions6schemaE"></span><span id="_CPPv2N5arrow7dataset26PartitioningFactoryOptions6schemaE"></span><span id="arrow::dataset::PartitioningFactoryOptions::schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_partitioning_factory_options_1a1078577bc1b2126823c136735a8387b0"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptions6schemaE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Optionally, an expected schema can be provided, in which case inference will only check discovered fields against the schema and update internal state (such as dictionaries). </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset30HivePartitioningFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset30HivePartitioningFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset30HivePartitioningFactoryOptionsE"></span><span id="arrow::dataset::HivePartitioningFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_hive_partitioning_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioningFactoryOptions</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="arrow::dataset::PartitioningFactoryOptions"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset30HivePartitioningFactoryOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Options for inferring a hive-style partitioning. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE">
<span id="_CPPv3N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE"></span><span id="_CPPv2N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE"></span><span id="arrow::dataset::HivePartitioningFactoryOptions::null_fallback__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_hive_partitioning_factory_options_1a8e48cce66a027ba832654f1593be8b3e"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">null_fallback</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset30HivePartitioningFactoryOptions13null_fallbackE" title="Permalink to this definition"></a><br /></dt>
<dd><p>The hive partitioning scheme maps null to a hard coded fallback string. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19PartitioningFactoryE">
<span id="_CPPv3N5arrow7dataset19PartitioningFactoryE"></span><span id="_CPPv2N5arrow7dataset19PartitioningFactoryE"></span><span id="arrow::dataset::PartitioningFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">PartitioningFactory</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a> provides creation of a partitioning when the specific schema must be inferred from available paths (no explicit schema is known). </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19PartitioningFactory9type_nameEv">
<span id="_CPPv3NK5arrow7dataset19PartitioningFactory9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset19PartitioningFactory9type_nameEv"></span><span id="arrow::dataset::PartitioningFactory::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory_1afd3b563fc589b700ef23d2726ac6d518"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19PartitioningFactory9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::PartitioningFactory::Inspect__std::vector:ss:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory_1a14c6a93085e2b05aadac45d36d7ea483"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19PartitioningFactory7InspectERKNSt6vectorINSt6stringEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the schema for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
<p>This may reset internal state, for example dictionaries of unique representations. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::PartitioningFactory::Finish__std::shared_ptr:Schema:CRC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_factory_1a5b601acefcfda6236dad628bbce072fc"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19PartitioningFactory6FinishERKNSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a partitioning using the provided schema (fields may be dropped). </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20KeyValuePartitioningE">
<span id="_CPPv3N5arrow7dataset20KeyValuePartitioningE"></span><span id="_CPPv2N5arrow7dataset20KeyValuePartitioningE"></span><span id="arrow::dataset::KeyValuePartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_key_value_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">KeyValuePartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Subclass for the common case of a partitioning which yields an equality expression for each segment. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">arrow::dataset::DirectoryPartitioning</span></a>, <a class="reference internal" href="#classarrow_1_1dataset_1_1_hive_partitioning"><span class="std std-ref">arrow::dataset::HivePartitioning</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE">
<span id="_CPPv3NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE"></span><span id="_CPPv2NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE"></span><span id="arrow::dataset::KeyValuePartitioning::Parse__ssCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_key_value_partitioning_1a2747e2ef3c8e39dfca19d71b2892a1ee"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Expression</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Parse</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20KeyValuePartitioning5ParseERKNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Parse a path into a partition expression. </p>
</dd></dl>
</div>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20KeyValuePartitioning3KeyE">
<span id="_CPPv3N5arrow7dataset20KeyValuePartitioning3KeyE"></span><span id="_CPPv2N5arrow7dataset20KeyValuePartitioning3KeyE"></span><span id="arrow::dataset::KeyValuePartitioning::Key"></span><span class="target" id="structarrow_1_1dataset_1_1_key_value_partitioning_1_1_key"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Key</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset20KeyValuePartitioning3KeyE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>An unconverted equality expression consisting of a field name and the representation of a scalar value. </p>
</dd></dl>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21DirectoryPartitioningE">
<span id="_CPPv3N5arrow7dataset21DirectoryPartitioningE"></span><span id="_CPPv2N5arrow7dataset21DirectoryPartitioningE"></span><span id="arrow::dataset::DirectoryPartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">DirectoryPartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="arrow::dataset::KeyValuePartitioning"><span class="n"><span class="pre">KeyValuePartitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset21DirectoryPartitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">DirectoryPartitioning</span></a> parses one segment of a path for each field in its schema. </p>
<p>All fields are required, so paths passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_key_value_partitioning_1a2747e2ef3c8e39dfca19d71b2892a1ee"><span class="std std-ref">DirectoryPartitioning::Parse</span></a> must contain segments for each field.</p>
<p>For example given schema&lt;year:int16, month:int8&gt; the path “/2009/11” would be parsed to (“year”_ == 2009 and “month”_ == 11) </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector">
<span id="_CPPv3N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector"></span><span id="_CPPv2N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector"></span><span id="arrow::dataset::DirectoryPartitioning::DirectoryPartitioning__std::shared_ptr:Schema:.ArrayVector"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning_1a057d47768271c20e1ad36ea8272af8c9"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DirectoryPartitioning</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">ArrayVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dictionaries</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21DirectoryPartitioning21DirectoryPartitioningENSt10shared_ptrI6SchemaEE11ArrayVector" title="Permalink to this definition"></a><br /></dt>
<dd><p>If a field in schema is of dictionary type, the corresponding element of dictionaries must be contain the dictionary of values for that field. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset21DirectoryPartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset21DirectoryPartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset21DirectoryPartitioning9type_nameEv"></span><span id="arrow::dataset::DirectoryPartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning_1ad56559bf0912952469c2f7bab7365a1d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset21DirectoryPartitioning9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions">
<span id="_CPPv3N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions"></span><span id="_CPPv2N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions"></span><span id="arrow::dataset::DirectoryPartitioning::MakeFactory__std::vector:ss:.PartitioningFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_directory_partitioning_1af6105a7c349a4e87dd3646ffe1e8c275"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFactory</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">field_names</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset26PartitioningFactoryOptionsE" title="arrow::dataset::PartitioningFactoryOptions"><span class="n"><span class="pre">PartitioningFactoryOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21DirectoryPartitioning11MakeFactoryENSt6vectorINSt6stringEEE26PartitioningFactoryOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a factory for a directory partitioning. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>field_names</strong><strong>[in]</strong> The names for the partition fields. Types will be inferred. </p>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16HivePartitioningE">
<span id="_CPPv3N5arrow7dataset16HivePartitioningE"></span><span id="_CPPv2N5arrow7dataset16HivePartitioningE"></span><span id="arrow::dataset::HivePartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset20KeyValuePartitioningE" title="arrow::dataset::KeyValuePartitioning"><span class="n"><span class="pre">KeyValuePartitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset16HivePartitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Multi-level, directory based partitioning originating from Apache Hive with all data files stored in the leaf directories. </p>
<p>Data is partitioned by static values of a particular column in the schema. Partition keys are represented in the form $key=$value in directory names. <a class="reference internal" href="datatype.html#classarrow_1_1_field"><span class="std std-ref">Field</span></a> order is ignored, as are missing or unrecognized field names.</p>
<p>For example given schema&lt;year:int16, month:int8, day:int8&gt; the path “/day=321/ignored=3.4/year=2009” parses to (“year”_ == 2009 and “day”_ == 321) </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE">
<span id="_CPPv3N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE"></span><span id="_CPPv2N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE"></span><span id="arrow::dataset::HivePartitioning::HivePartitioning__std::shared_ptr:Schema:.ArrayVector.ss"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning_1a59b84f3bc2f04df60572a3381d5d9a50"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">HivePartitioning</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">ArrayVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">dictionaries</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">null_fallback</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv424kDefaultHiveNullFallback" title="kDefaultHiveNullFallback"><span class="n"><span class="pre">kDefaultHiveNullFallback</span></span></a><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset16HivePartitioning16HivePartitioningENSt10shared_ptrI6SchemaEE11ArrayVectorNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>If a field in schema is of dictionary type, the corresponding element of dictionaries must be contain the dictionary of values for that field. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset16HivePartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset16HivePartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset16HivePartitioning9type_nameEv"></span><span id="arrow::dataset::HivePartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning_1ac8729632a9681195f8c01ac58e2909b3"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset16HivePartitioning9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions">
<span id="_CPPv3N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions"></span><span id="_CPPv2N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions"></span><span id="arrow::dataset::HivePartitioning::MakeFactory__HivePartitioningFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_hive_partitioning_1a4c5fb22aefbf30c6c379cf22c0afb067"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFactory</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset30HivePartitioningFactoryOptionsE" title="arrow::dataset::HivePartitioningFactoryOptions"><span class="n"><span class="pre">HivePartitioningFactoryOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset16HivePartitioning11MakeFactoryE30HivePartitioningFactoryOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a factory for a hive partitioning. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset20FunctionPartitioningE">
<span id="_CPPv3N5arrow7dataset20FunctionPartitioningE"></span><span id="_CPPv2N5arrow7dataset20FunctionPartitioningE"></span><span id="arrow::dataset::FunctionPartitioning"></span><span class="target" id="classarrow_1_1dataset_1_1_function_partitioning"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FunctionPartitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset20FunctionPartitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Implementation provided by lambda or other callable. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20FunctionPartitioning9type_nameEv">
<span id="_CPPv3NK5arrow7dataset20FunctionPartitioning9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset20FunctionPartitioning9type_nameEv"></span><span id="arrow::dataset::FunctionPartitioning::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_function_partitioning_1a392a407e50f8e0d5ead69be21edfa435"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20FunctionPartitioning9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of partitioning. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE">
<span id="_CPPv3NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE"></span><span id="_CPPv2NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE"></span><span id="arrow::dataset::FunctionPartitioning::Parse__ssCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_function_partitioning_1a2764c0494f879fc454851b33f5954e47"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Expression</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Parse</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">path</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset20FunctionPartitioning5ParseERKNSt6stringE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Parse a path into a partition expression. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21PartitioningOrFactoryE">
<span id="_CPPv3N5arrow7dataset21PartitioningOrFactoryE"></span><span id="_CPPv2N5arrow7dataset21PartitioningOrFactoryE"></span><span id="arrow::dataset::PartitioningOrFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">PartitioningOrFactory</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21PartitioningOrFactoryE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/partition.h&gt;</em><p>Either a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> or a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset21PartitioningOrFactory12partitioningEv">
<span id="_CPPv3NK5arrow7dataset21PartitioningOrFactory12partitioningEv"></span><span id="_CPPv2NK5arrow7dataset21PartitioningOrFactory12partitioningEv"></span><span id="arrow::dataset::PartitioningOrFactory::partitioningC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory_1adf893490b52fd3761f75056d360aa040"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset21PartitioningOrFactory12partitioningEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The partitioning (if given). </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset21PartitioningOrFactory7factoryEv">
<span id="_CPPv3NK5arrow7dataset21PartitioningOrFactory7factoryEv"></span><span id="_CPPv2NK5arrow7dataset21PartitioningOrFactory7factoryEv"></span><span id="arrow::dataset::PartitioningOrFactory::factoryC"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory_1aeaadd56fdfb179bc6b53e8fda83b9cd3"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19PartitioningFactoryE" title="arrow::dataset::PartitioningFactory"><span class="n"><span class="pre">PartitioningFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">factory</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset21PartitioningOrFactory7factoryEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The partition factory (if given). </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::PartitioningOrFactory::GetOrInferSchema__std::vector:ss:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_partitioning_or_factory_1afa9b03e3ed01361864661d267c7c2318"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">GetOrInferSchema</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset21PartitioningOrFactory16GetOrInferSchemaERKNSt6vectorINSt6stringEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the partition schema, inferring it with the given factory if needed. </p>
</dd></dl>
</div>
</dd></dl>
</div>
<div class="section" id="dataset-discovery-factories">
<h2>Dataset discovery/factories<a class="headerlink" href="#dataset-discovery-factories" title="Permalink to this headline"></a></h2>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptionsE">
<span id="_CPPv3N5arrow7dataset14InspectOptionsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptionsE"></span><span id="arrow::dataset::InspectOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">InspectOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/discovery.h&gt;</em><div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptions9fragmentsE">
<span id="_CPPv3N5arrow7dataset14InspectOptions9fragmentsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptions9fragmentsE"></span><span id="arrow::dataset::InspectOptions::fragments__i"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options_1a6a2b84b68816d279b5c3f94a3585b677"></span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragments</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptions9fragmentsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Indicate how many fragments should be inspected to infer the unified dataset schema. </p>
<p>Limiting the number of fragments accessed improves the latency of the discovery process when dealing with a high number of fragments and/or high latency file systems.</p>
<p>The default value of <code class="docutils literal notranslate"><span class="pre">1</span></code> inspects the schema of the first (in no particular order) fragment only. If the dataset has a uniform schema for all fragments, this default is the optimal value. In order to inspect all fragments and robustly unify their potentially varying schemas, set this option to <code class="docutils literal notranslate"><span class="pre">kInspectAllFragments</span></code>. A value of <code class="docutils literal notranslate"><span class="pre">0</span></code> disables inspection of fragments altogether so only the partitioning schema will be inspected. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-attributes">Public Static Attributes</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14InspectOptions20kInspectAllFragmentsE">
<span id="_CPPv3N5arrow7dataset14InspectOptions20kInspectAllFragmentsE"></span><span id="_CPPv2N5arrow7dataset14InspectOptions20kInspectAllFragmentsE"></span><span id="arrow::dataset::InspectOptions::kInspectAllFragments__i"></span><span class="target" id="structarrow_1_1dataset_1_1_inspect_options_1ac662ba0cbd1629a62797abc62f584be8"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kInspectAllFragments</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="o"><span class="pre">-</span></span><span class="m"><span class="pre">1</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14InspectOptions20kInspectAllFragmentsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>See <code class="docutils literal notranslate"><span class="pre">fragments</span></code> property. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptionsE">
<span id="_CPPv3N5arrow7dataset13FinishOptionsE"></span><span id="_CPPv2N5arrow7dataset13FinishOptionsE"></span><span id="arrow::dataset::FinishOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FinishOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/discovery.h&gt;</em><div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptions6schemaE">
<span id="_CPPv3N5arrow7dataset13FinishOptions6schemaE"></span><span id="_CPPv2N5arrow7dataset13FinishOptions6schemaE"></span><span id="arrow::dataset::FinishOptions::schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options_1a02b6442d0b26184f8016ab2b91d1aa8a"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">schema</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptions6schemaE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Finalize the dataset with this given schema. </p>
<p>If the schema is not provided, infer the schema via the Inspect, see the <code class="docutils literal notranslate"><span class="pre">inspect_options</span></code> property. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptions15inspect_optionsE">
<span id="_CPPv3N5arrow7dataset13FinishOptions15inspect_optionsE"></span><span id="_CPPv2N5arrow7dataset13FinishOptions15inspect_optionsE"></span><span id="arrow::dataset::FinishOptions::inspect_options__InspectOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options_1a14dde92ea1ab7ea05923fb36ba1503d6"></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">inspect_options</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptions15inspect_optionsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>If the schema is not provided, it will be discovered by passing the following options to <code class="docutils literal notranslate"><span class="pre">DatasetDiscovery::Inspect</span></code>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset13FinishOptions18validate_fragmentsE">
<span id="_CPPv3N5arrow7dataset13FinishOptions18validate_fragmentsE"></span><span id="_CPPv2N5arrow7dataset13FinishOptions18validate_fragmentsE"></span><span id="arrow::dataset::FinishOptions::validate_fragments__b"></span><span class="target" id="structarrow_1_1dataset_1_1_finish_options_1ad26a9261c353cdd02860e38903eac807"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">validate_fragments</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset13FinishOptions18validate_fragmentsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Indicate if the given <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a> (when specified), should be validated against the fragments’ schemas. </p>
<p><code class="docutils literal notranslate"><span class="pre">inspect_options</span></code> will control how many fragments are checked. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactoryE">
<span id="_CPPv3N5arrow7dataset14DatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset14DatasetFactoryE"></span><span id="arrow::dataset::DatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">DatasetFactory</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/discovery.h&gt;</em><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a> provides a way to inspect/discover a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>’s expected schema before materializing said <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
<p>Subclassed by <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">arrow::dataset::FileSystemDatasetFactory</span></a>, arrow::dataset::ParquetDatasetFactory, <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset_factory"><span class="std std-ref">arrow::dataset::UnionDatasetFactory</span></a></p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::DatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a216b18c920df76f8dbe0eeead9ac3ff1"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the schemas of the Fragments and <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory7InspectE14InspectOptions">
<span id="_CPPv3N5arrow7dataset14DatasetFactory7InspectE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory7InspectE14InspectOptions"></span><span id="arrow::dataset::DatasetFactory::Inspect__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a0a15be09625b8c6ac2b9ef1889b6710a"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="p"><span class="pre">}</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory7InspectE14InspectOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get unified schema for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory6FinishEv">
<span id="_CPPv3N5arrow7dataset14DatasetFactory6FinishEv"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory6FinishEv"></span><span id="arrow::dataset::DatasetFactory::Finish"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1af69a68f1c04cb70b5f3f2cb42a0643ea"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory6FinishEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::DatasetFactory::Finish__std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a05f84109ae8c822b27f63cd3fa289ace"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory6FinishENSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given schema (see <em>InspectOptions::schema</em>) </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset14DatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::DatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a4744d4cd12db41f9c29433af1fa730b4"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory6FinishE13FinishOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given options. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset14DatasetFactory14root_partitionEv">
<span id="_CPPv3NK5arrow7dataset14DatasetFactory14root_partitionEv"></span><span id="_CPPv2NK5arrow7dataset14DatasetFactory14root_partitionEv"></span><span id="arrow::dataset::DatasetFactory::root_partitionC"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1a1e4b87cd3605ecacbfdcf9ef02301be5"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">root_partition</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset14DatasetFactory14root_partitionEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Optional root partition for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14DatasetFactory16SetRootPartitionE10Expression">
<span id="_CPPv3N5arrow7dataset14DatasetFactory16SetRootPartitionE10Expression"></span><span id="_CPPv2N5arrow7dataset14DatasetFactory16SetRootPartitionE10Expression"></span><span id="arrow::dataset::DatasetFactory::SetRootPartition__Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_dataset_factory_1ab642aaef8fe8fd3e17b2aa882c6bb20d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">SetRootPartition</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">partition</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14DatasetFactory16SetRootPartitionE10Expression" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set the root partition for the resulting <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
</div>
<div class="section" id="scanning">
<h2>Scanning<a class="headerlink" href="#scanning" title="Permalink to this headline"></a></h2>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv426TaggedRecordBatchGenerator">
<span id="_CPPv326TaggedRecordBatchGenerator"></span><span id="_CPPv226TaggedRecordBatchGenerator"></span><span class="target" id="group__dataset-scanning_1gab483ab439e8b34006f3204005b0724ec"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TaggedRecordBatchGenerator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Future</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">TaggedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv426TaggedRecordBatchGenerator" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv425TaggedRecordBatchIterator">
<span id="_CPPv325TaggedRecordBatchIterator"></span><span id="_CPPv225TaggedRecordBatchIterator"></span><span class="target" id="group__dataset-scanning_1ga26c872b52e47a7e20f901d743df3bef9"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TaggedRecordBatchIterator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">Iterator</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">TaggedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv425TaggedRecordBatchIterator" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv430EnumeratedRecordBatchGenerator">
<span id="_CPPv330EnumeratedRecordBatchGenerator"></span><span id="_CPPv230EnumeratedRecordBatchGenerator"></span><span class="target" id="group__dataset-scanning_1ga2aab0007c4c0e475197811f993db724e"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnumeratedRecordBatchGenerator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Future</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">EnumeratedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv430EnumeratedRecordBatchGenerator" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp type">
<dt class="sig sig-object cpp" id="_CPPv429EnumeratedRecordBatchIterator">
<span id="_CPPv329EnumeratedRecordBatchIterator"></span><span id="_CPPv229EnumeratedRecordBatchIterator"></span><span class="target" id="group__dataset-scanning_1gac15b45b87696d4c200949df0e7965ea1"></span><span class="k"><span class="pre">using</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnumeratedRecordBatchIterator</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">Iterator</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">EnumeratedRecordBatch</span></span><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv429EnumeratedRecordBatchIterator" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv417kDefaultBatchSize">
<span id="_CPPv317kDefaultBatchSize"></span><span id="_CPPv217kDefaultBatchSize"></span><span id="kDefaultBatchSize__int64_t"></span><span class="target" id="group__dataset-scanning_1ga073228430616d6b28274a6f3e570e5d7"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultBatchSize</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1</span></span><span class="w"> </span><span class="o"><span class="pre">&lt;&lt;</span></span><span class="w"> </span><span class="m"><span class="pre">20</span></span><a class="headerlink" href="#_CPPv417kDefaultBatchSize" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv422kDefaultBatchReadahead">
<span id="_CPPv322kDefaultBatchReadahead"></span><span id="_CPPv222kDefaultBatchReadahead"></span><span id="kDefaultBatchReadahead__int32_t"></span><span class="target" id="group__dataset-scanning_1ga3eb0d0d3032ccacc02cbfe0a94ca5d6c"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultBatchReadahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">32</span></span><a class="headerlink" href="#_CPPv422kDefaultBatchReadahead" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv425kDefaultFragmentReadahead">
<span id="_CPPv325kDefaultFragmentReadahead"></span><span id="_CPPv225kDefaultFragmentReadahead"></span><span id="kDefaultFragmentReadahead__int32_t"></span><span class="target" id="group__dataset-scanning_1ga6f4b8db3cb8c1bcfc6874453d7ba40b4"></span><span class="k"><span class="pre">constexpr</span></span><span class="w"> </span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">kDefaultFragmentReadahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">8</span></span><a class="headerlink" href="#_CPPv425kDefaultFragmentReadahead" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19FragmentScanOptionsE">
<span id="_CPPv3N5arrow7dataset19FragmentScanOptionsE"></span><span id="_CPPv2N5arrow7dataset19FragmentScanOptionsE"></span><span id="arrow::dataset::FragmentScanOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_fragment_scan_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FragmentScanOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/dataset.h&gt;</em><p>Per-scan options for fragment(s) in a dataset. </p>
<p>These options are not intrinsic to the format or fragment itself, but do affect the results of a scan. These are options which make sense to change between repeated reads of the same dataset, such as format-specific conversion options (that do not affect the schema). </p>
<p>Subclassed by arrow::dataset::CsvFragmentScanOptions, arrow::dataset::IpcFragmentScanOptions, arrow::dataset::ParquetFragmentScanOptions</p>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptionsE">
<span id="_CPPv3N5arrow7dataset11ScanOptionsE"></span><span id="_CPPv2N5arrow7dataset11ScanOptionsE"></span><span id="arrow::dataset::ScanOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">ScanOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/scanner.h&gt;</em><p>Scan-specific options, which can be changed between scans of the same dataset. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions6filterE">
<span id="_CPPv3N5arrow7dataset11ScanOptions6filterE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions6filterE"></span><span id="arrow::dataset::ScanOptions::filter__Expression"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1aa54e0de57a48424f1e29d48d7f2455d3"></span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">filter</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">literal</span></span><span class="p"><span class="pre">(</span></span><span class="k"><span class="pre">true</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions6filterE" title="Permalink to this definition"></a><br /></dt>
<dd><p>A row filter (which will be pushed down to partitioning/reading if supported). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions10projectionE">
<span id="_CPPv3N5arrow7dataset11ScanOptions10projectionE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions10projectionE"></span><span id="arrow::dataset::ScanOptions::projection__Expression"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a1c6246446ea41cf037b5691d14002e9d"></span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">projection</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions10projectionE" title="Permalink to this definition"></a><br /></dt>
<dd><p>A projection expression (which can add/remove/rename columns). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions14dataset_schemaE">
<span id="_CPPv3N5arrow7dataset11ScanOptions14dataset_schemaE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions14dataset_schemaE"></span><span id="arrow::dataset::ScanOptions::dataset_schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a6fbf0815c71758c41abfa26b06e0a5e1"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">dataset_schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions14dataset_schemaE" title="Permalink to this definition"></a><br /></dt>
<dd><p><a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a> with which batches will be read from fragments. </p>
<p>This is also known as the “reader schema” it will be used (for example) in constructing CSV file readers to identify column types for parsing. Usually only a subset of its fields (see MaterializedFields) will be materialized during a scan. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions16projected_schemaE">
<span id="_CPPv3N5arrow7dataset11ScanOptions16projected_schemaE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions16projected_schemaE"></span><span id="arrow::dataset::ScanOptions::projected_schema__std::shared_ptr:Schema:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a32a9c0b11c73afb80c15686595df8483"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">projected_schema</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions16projected_schemaE" title="Permalink to this definition"></a><br /></dt>
<dd><p><a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a> of projected record batches. </p>
<p>This is independent of dataset_schema as its fields are derived from the projection. For example, let</p>
<p>dataset_schema = {“a”: int32, “b”: int32, “id”: utf8} projection = project({equal(field_ref(“a”), field_ref(“b”))}, {“a_plus_b”})</p>
<p>(no filter specified). In this case, the projected_schema would be</p>
<p>{“a_plus_b”: int32} </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions10batch_sizeE">
<span id="_CPPv3N5arrow7dataset11ScanOptions10batch_sizeE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions10batch_sizeE"></span><span id="arrow::dataset::ScanOptions::batch_size__int64_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a3c28eef3c8804a93c488a4861ad193ee"></span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">batch_size</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv417kDefaultBatchSize" title="kDefaultBatchSize"><span class="n"><span class="pre">kDefaultBatchSize</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions10batch_sizeE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Maximum row count for scanned batches. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions15batch_readaheadE">
<span id="_CPPv3N5arrow7dataset11ScanOptions15batch_readaheadE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions15batch_readaheadE"></span><span id="arrow::dataset::ScanOptions::batch_readahead__int32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a129a6281e006cb6681d6d246b0c6a99b"></span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">batch_readahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv422kDefaultBatchReadahead" title="kDefaultBatchReadahead"><span class="n"><span class="pre">kDefaultBatchReadahead</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions15batch_readaheadE" title="Permalink to this definition"></a><br /></dt>
<dd><p>How many batches to read ahead within a file. </p>
<p>Set to 0 to disable batch readahead</p>
<p>Note: May not be supported by all formats Note: May not be supported by all scanners Note: Will be ignored if use_threads is set to false </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions18fragment_readaheadE">
<span id="_CPPv3N5arrow7dataset11ScanOptions18fragment_readaheadE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions18fragment_readaheadE"></span><span id="arrow::dataset::ScanOptions::fragment_readahead__int32_t"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a63e5bc6675f808e576841fcf13545435"></span><span class="n"><span class="pre">int32_t</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragment_readahead</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv425kDefaultFragmentReadahead" title="kDefaultFragmentReadahead"><span class="n"><span class="pre">kDefaultFragmentReadahead</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions18fragment_readaheadE" title="Permalink to this definition"></a><br /></dt>
<dd><p>How many files to read ahead. </p>
<p>Set to 0 to disable fragment readahead</p>
<p>Note: May not be enforced by all scanners Note: Will be ignored if use_threads is set to false </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions4poolE">
<span id="_CPPv3N5arrow7dataset11ScanOptions4poolE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions4poolE"></span><span id="arrow::dataset::ScanOptions::pool__MemoryPoolP"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a90fc341733356314293b9bf414fa4610"></span><a class="reference internal" href="memory.html#_CPPv4N5arrow10MemoryPoolE" title="arrow::MemoryPool"><span class="n"><span class="pre">MemoryPool</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="sig-name descname"><span class="n"><span class="pre">pool</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="memory.html#_CPPv4N5arrow19default_memory_poolEv" title="arrow::default_memory_pool"><span class="n"><span class="pre">default_memory_pool</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions4poolE" title="Permalink to this definition"></a><br /></dt>
<dd><p>A pool from which materialized and scanned arrays will be allocated. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions10io_contextE">
<span id="_CPPv3N5arrow7dataset11ScanOptions10io_contextE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions10io_contextE"></span><span id="arrow::dataset::ScanOptions::io_context__io::IOContext"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a2cbac92967c86d75605244a218ca66d1"></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">IOContext</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">io_context</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions10io_contextE" title="Permalink to this definition"></a><br /></dt>
<dd><p>IOContext for any IO tasks. </p>
<p>Note: The IOContext executor will be ignored if use_threads is set to false </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions11use_threadsE">
<span id="_CPPv3N5arrow7dataset11ScanOptions11use_threadsE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions11use_threadsE"></span><span id="arrow::dataset::ScanOptions::use_threads__b"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a14c08f047d28e5a705ef1e275f11a98a"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">use_threads</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions11use_threadsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>If true the scanner will scan in parallel. </p>
<p>Note: If true, this will use threads from both the cpu_executor and the io_context.executor Note: This must be true in order for any readahead to happen </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions9use_asyncE">
<span id="_CPPv3N5arrow7dataset11ScanOptions9use_asyncE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions9use_asyncE"></span><span id="arrow::dataset::ScanOptions::use_async__b"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1aa9903f4d5ae620c1c35023b17cf2b517"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">use_async</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions9use_asyncE" title="Permalink to this definition"></a><br /></dt>
<dd><p>If true then an asycnhronous implementation of the scanner will be used. </p>
<p>This implementation is newer and generally performs better. However, it makes extensive use of threading and is still considered experimental </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset11ScanOptions21fragment_scan_optionsE">
<span id="_CPPv3N5arrow7dataset11ScanOptions21fragment_scan_optionsE"></span><span id="_CPPv2N5arrow7dataset11ScanOptions21fragment_scan_optionsE"></span><span id="arrow::dataset::ScanOptions::fragment_scan_options__std::shared_ptr:FragmentScanOptions:"></span><span class="target" id="structarrow_1_1dataset_1_1_scan_options_1a10146664bc67f796084a54c6c961d3c3"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">fragment_scan_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset11ScanOptions21fragment_scan_optionsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Fragment-specific scan options. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8ScanTaskE">
<span id="_CPPv3N5arrow7dataset8ScanTaskE"></span><span id="_CPPv2N5arrow7dataset8ScanTaskE"></span><span id="arrow::dataset::ScanTask"></span><span class="target" id="classarrow_1_1dataset_1_1_scan_task"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">ScanTask</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8ScanTaskE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/scanner.h&gt;</em><p>Read record batches from a range of a single data fragment. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_scan_task"><span class="std std-ref">ScanTask</span></a> is meant to be a unit of work to be dispatched. The implementation must be thread and concurrent safe. </p>
<p>Subclassed by arrow::dataset::InMemoryScanTask</p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset8ScanTask7ExecuteEv">
<span id="_CPPv3N5arrow7dataset8ScanTask7ExecuteEv"></span><span id="_CPPv2N5arrow7dataset8ScanTask7ExecuteEv"></span><span id="arrow::dataset::ScanTask::Execute"></span><span class="target" id="classarrow_1_1dataset_1_1_scan_task_1a3ca31f90c5bf59165f79de1a5fb5fb0a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Execute</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset8ScanTask7ExecuteEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Iterate through sequence of materialized record batches resulting from the Scan. </p>
<p>Execution semantics are encapsulated in the particular <a class="reference internal" href="#classarrow_1_1dataset_1_1_scan_task"><span class="std std-ref">ScanTask</span></a> implementation </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17TaggedRecordBatchE">
<span id="_CPPv3N5arrow7dataset17TaggedRecordBatchE"></span><span id="_CPPv2N5arrow7dataset17TaggedRecordBatchE"></span><span id="arrow::dataset::TaggedRecordBatch"></span><span class="target" id="structarrow_1_1dataset_1_1_tagged_record_batch"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TaggedRecordBatch</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset17TaggedRecordBatchE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/scanner.h&gt;</em><p>Combines a record batch with the fragment that the record batch originated from. </p>
<p>Knowing the source fragment can be useful for debugging &amp; understanding loaded data </p>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset21EnumeratedRecordBatchE">
<span id="_CPPv3N5arrow7dataset21EnumeratedRecordBatchE"></span><span id="_CPPv2N5arrow7dataset21EnumeratedRecordBatchE"></span><span id="arrow::dataset::EnumeratedRecordBatch"></span><span class="target" id="structarrow_1_1dataset_1_1_enumerated_record_batch"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">EnumeratedRecordBatch</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset21EnumeratedRecordBatchE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/scanner.h&gt;</em><p>Combines a tagged batch with positional information. </p>
<p>This is returned when scanning batches in an unordered fashion. This information is needed if you ever want to reassemble the batches in order </p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7ScannerE">
<span id="_CPPv3N5arrow7dataset7ScannerE"></span><span id="_CPPv2N5arrow7dataset7ScannerE"></span><span id="arrow::dataset::Scanner"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">Scanner</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7ScannerE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/scanner.h&gt;</em><p>A scanner glues together several dataset classes to load in data. </p>
<p>The dataset contains a collection of fragments and partitioning rules.</p>
<p>The fragments identify independently loadable units of data (i.e. each fragment has a potentially unique schema and possibly even format. It should be possible to read fragments in parallel if desired).</p>
<p>The fragment’s format contains the logic necessary to actually create a task to load the fragment into memory. That task may or may not support parallel execution of its own.</p>
<p>The scanner is then responsible for creating scan tasks from every fragment in the dataset and (potentially) sequencing the loaded record batches together.</p>
<p>The scanner should not buffer the entire dataset in memory (unless asked) instead yielding record batches as soon as they are ready to scan. Various readahead properties control how much data is allowed to be scanned before pausing to let a slow consumer catchup.</p>
<p>Today the scanner also handles projection &amp; filtering although that may change in the future. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner4ScanEv">
<span id="_CPPv3N5arrow7dataset7Scanner4ScanEv"></span><span id="_CPPv2N5arrow7dataset7Scanner4ScanEv"></span><span id="arrow::dataset::Scanner::Scan"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a0e9ac851a76432ff37dd35c7a7417842"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ScanTaskIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scan</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner4ScanEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The Scan operator returns a stream of <a class="reference internal" href="#classarrow_1_1dataset_1_1_scan_task"><span class="std std-ref">ScanTask</span></a>. </p>
<p>The caller is responsible to dispatch/schedule said tasks. Tasks should be safe to run in a concurrent fashion and outlive the iterator.</p>
<p>Note: Not supported by the async scanner Planned for removal from the public API in ARROW-11782. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE">
<span id="_CPPv3N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE"></span><span id="_CPPv2N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a343eb51ca6ada0ff70bf7a868214c1dd"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scan</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">function</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="p"><span class="pre">(</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset17TaggedRecordBatchE" title="arrow::dataset::TaggedRecordBatch"><span class="n"><span class="pre">TaggedRecordBatch</span></span></a><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">visitor</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner4ScanENSt8functionIF6Status17TaggedRecordBatchEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Apply a visitor to each <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a> as it is scanned. </p>
<p>If multiple threads are used (via use_threads), the visitor will be invoked from those threads and is responsible for any synchronization. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner7ToTableEv">
<span id="_CPPv3N5arrow7dataset7Scanner7ToTableEv"></span><span id="_CPPv2N5arrow7dataset7Scanner7ToTableEv"></span><span id="arrow::dataset::Scanner::ToTable"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a706750f2cd07ba40507a7ba139c78ea5"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ToTable</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner7ToTableEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Convert a <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> into a <a class="reference internal" href="table.html#classarrow_1_1_table"><span class="std std-ref">Table</span></a>. </p>
<p>Use this convenience utility with care. This will serially materialize the Scan result in memory before creating the <a class="reference internal" href="table.html#classarrow_1_1_table"><span class="std std-ref">Table</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner11ScanBatchesEv">
<span id="_CPPv3N5arrow7dataset7Scanner11ScanBatchesEv"></span><span id="_CPPv2N5arrow7dataset7Scanner11ScanBatchesEv"></span><span id="arrow::dataset::Scanner::ScanBatches"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a8c511a98ed14b65d39d36347a4007612"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv425TaggedRecordBatchIterator" title="TaggedRecordBatchIterator"><span class="n"><span class="pre">TaggedRecordBatchIterator</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatches</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner11ScanBatchesEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Scan the dataset into a stream of record batches. </p>
<p>Each batch is tagged with the fragment it originated from. The batches will arrive in order. The order of fragments is determined by the dataset.</p>
<p>Note: The scanner will perform some readahead but will avoid materializing too much in memory (this is goverended by the readahead options and use_threads option). If the readahead queue fills up then I/O will pause until the calling thread catches up. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner20ScanBatchesUnorderedEv">
<span id="_CPPv3N5arrow7dataset7Scanner20ScanBatchesUnorderedEv"></span><span id="_CPPv2N5arrow7dataset7Scanner20ScanBatchesUnorderedEv"></span><span id="arrow::dataset::Scanner::ScanBatchesUnordered"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a214ed25513bac9a80544afcc983e5272"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv429EnumeratedRecordBatchIterator" title="EnumeratedRecordBatchIterator"><span class="n"><span class="pre">EnumeratedRecordBatchIterator</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesUnordered</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner20ScanBatchesUnorderedEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Scan the dataset into a stream of record batches. </p>
<p>Unlike ScanBatches this method may allow record batches to be returned out of order. This allows for more efficient scanning: some fragments may be accessed more quickly than others (e.g. may be cached in RAM or just happen to get scheduled earlier by the I/O)</p>
<p>To make up for the out-of-order iteration each batch is further tagged with positional information. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner8TakeRowsERK5Array">
<span id="_CPPv3N5arrow7dataset7Scanner8TakeRowsERK5Array"></span><span id="_CPPv2N5arrow7dataset7Scanner8TakeRowsERK5Array"></span><span id="arrow::dataset::Scanner::TakeRows__ArrayCR"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1aaeabbe618600cf97ffc7378d2b7538e6"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">TakeRows</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="array.html#_CPPv4N5arrow5ArrayE" title="arrow::Array"><span class="n"><span class="pre">Array</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">indices</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner8TakeRowsERK5Array" title="Permalink to this definition"></a><br /></dt>
<dd><p>A convenience to synchronously load the given rows by index. </p>
<p>Will only consume as many batches as needed from <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner_1a8c511a98ed14b65d39d36347a4007612"><span class="std std-ref">ScanBatches()</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset7Scanner4HeadE7int64_t">
<span id="_CPPv3N5arrow7dataset7Scanner4HeadE7int64_t"></span><span id="_CPPv2N5arrow7dataset7Scanner4HeadE7int64_t"></span><span id="arrow::dataset::Scanner::Head__int64_t"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1a110363c9a440d4d51400683ef228e557"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Head</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">num_rows</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset7Scanner4HeadE7int64_t" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the first N rows. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset7Scanner7optionsEv">
<span id="_CPPv3NK5arrow7dataset7Scanner7optionsEv"></span><span id="_CPPv2NK5arrow7dataset7Scanner7optionsEv"></span><span id="arrow::dataset::Scanner::optionsC"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_1adddf7d3dea394e017624ad7ea6c27b71"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">options</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset7Scanner7optionsEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the options for this scan. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilderE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilderE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilderE"></span><span id="arrow::dataset::ScannerBuilder"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">ScannerBuilder</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilderE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/scanner.h&gt;</em><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner_builder"><span class="std std-ref">ScannerBuilder</span></a> is a factory class to construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a>. </p>
<p>It is used to pass information, notably a potential filter expression and a subset of columns to materialize. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::ScannerBuilder::Project__std::vector:ss:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a15da200cbacbf327be99a3f6952ab21a"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Project</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">columns</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set the subset of columns to materialize. </p>
<p>Columns which are not referenced may not be read from fragments.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>columns</strong><strong>[in]</strong> list of columns to project. Order and duplicates will be preserved.</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Failure if any column name does not exists in the dataset’s <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a>. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorI10ExpressionEENSt6vectorINSt6stringEEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorI10ExpressionEENSt6vectorINSt6stringEEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorI10ExpressionEENSt6vectorINSt6stringEEE"></span><span id="arrow::dataset::ScannerBuilder::Project__std::vector:Expression:.std::vector:ss:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1ac9024acb461d7c89760795195fd5fe39"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Project</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Expression</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">exprs</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">names</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorI10ExpressionEENSt6vectorINSt6stringEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set expressions which will be evaluated to produce the materialized columns. </p>
<p>Columns which are not referenced may not be read from fragments.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>exprs</strong><strong>[in]</strong> expressions to evaluate to produce columns. </p></li>
<li><p><strong>names</strong><strong>[in]</strong> list of names for the resulting columns.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Failure if any referenced column does not exists in the dataset’s <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a>. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder6FilterERK10Expression">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder6FilterERK10Expression"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder6FilterERK10Expression"></span><span id="arrow::dataset::ScannerBuilder::Filter__ExpressionCR"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a97810781d70973deeb9832ae6128aace"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Filter</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">filter</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder6FilterERK10Expression" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set the filter expression to return only rows matching the filter. </p>
<p>The predicate will be passed down to Sources and corresponding Fragments to exploit predicate pushdown if possible using partition information or <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> internal metadata, e.g. Parquet statistics. Columns which are not referenced may not be read from fragments.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>filter</strong><strong>[in]</strong> expression to filter rows with.</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Failure if any referenced columns does not exist in the dataset’s <a class="reference internal" href="datatype.html#classarrow_1_1_schema"><span class="std std-ref">Schema</span></a>. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder10UseThreadsEb">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder10UseThreadsEb"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder10UseThreadsEb"></span><span id="arrow::dataset::ScannerBuilder::UseThreads__b"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1ab10faa196ec9a3a74d47681f1283097e"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">UseThreads</span></span></span><span class="sig-paren">(</span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">use_threads</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">true</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder10UseThreadsEb" title="Permalink to this definition"></a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> should make use of the available ThreadPool found in <a class="reference internal" href="#structarrow_1_1dataset_1_1_scan_options"><span class="std std-ref">ScanOptions</span></a>;. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder8UseAsyncEb">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder8UseAsyncEb"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder8UseAsyncEb"></span><span id="arrow::dataset::ScannerBuilder::UseAsync__b"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a93c9ed0552346d957a7670566b00440b"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">UseAsync</span></span></span><span class="sig-paren">(</span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">use_async</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">true</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder8UseAsyncEb" title="Permalink to this definition"></a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> should run in experimental “async” mode. </p>
<p>This mode should have considerably better performance on high-latency or parallel filesystems but is still experimental </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t"></span><span id="arrow::dataset::ScannerBuilder::BatchSize__int64_t"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1adc02c1cc2b9be99cf4d5e2f91cd23458"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">BatchSize</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">int64_t</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">batch_size</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder9BatchSizeE7int64_t" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set the maximum number of rows per <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a>. </p>
<p>
This option provides a control limiting the memory owned by any <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a>. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>batch_size</strong><strong>[in]</strong> the maximum number of rows. </p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>An error if the number for batch is not greater than 0.</p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool"></span><span id="arrow::dataset::ScannerBuilder::Pool__MemoryPoolP"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1acc144563c285acf7b0397d9878214d9d"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Pool</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="memory.html#_CPPv4N5arrow10MemoryPoolE" title="arrow::MemoryPool"><span class="n"><span class="pre">MemoryPool</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">pool</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder4PoolEP10MemoryPool" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set the pool from which materialized and scanned arrays will be allocated. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE"></span><span id="arrow::dataset::ScannerBuilder::FragmentScanOptions__std::shared_ptr:FragmentScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a93f1115f1ddb452e304a23e7ed68cb4c"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FragmentScanOptions</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE" title="arrow::dataset::ScannerBuilder::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">fragment_scan_options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Set fragment-specific scan options. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset14ScannerBuilder6FinishEv">
<span id="_CPPv3N5arrow7dataset14ScannerBuilder6FinishEv"></span><span id="_CPPv2N5arrow7dataset14ScannerBuilder6FinishEv"></span><span id="arrow::dataset::ScannerBuilder::Finish"></span><span class="target" id="classarrow_1_1dataset_1_1_scanner_builder_1a5fcc65be277f4e76ef89e16b9799168e"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><span class="n"><span class="pre">Scanner</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset14ScannerBuilder6FinishEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the constructed now-immutable <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a> object. </p>
</dd></dl>
</div>
</dd></dl>
</div>
<div class="section" id="concrete-implementations">
<h2>Concrete implementations<a class="headerlink" href="#concrete-implementations" title="Permalink to this headline"></a></h2>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragmentE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragmentE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragmentE"></span><span id="arrow::dataset::InMemoryFragment"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryFragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragmentE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/dataset.h&gt;</em><p>A trivial <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> that yields <a class="reference internal" href="#classarrow_1_1dataset_1_1_scan_task"><span class="std std-ref">ScanTask</span></a> out of a fixed set of <a class="reference internal" href="table.html#classarrow_1_1_record_batch"><span class="std std-ref">RecordBatch</span></a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragment4ScanENSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragment4ScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragment4ScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::InMemoryFragment::Scan__std::shared_ptr:ScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment_1a88f855af5b46a5add978c18665264d2e"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ScanTaskIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scan</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragment4ScanENSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Scan returns an iterator of ScanTasks, each of which yields RecordBatches from this <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
<p>Note that batches yielded using this method will not be filtered and may not align with the <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>’s schema. In particular, note that columns referenced by the filter may be present in yielded batches even if they are not projected (so that they are available when a filter is applied). Additionally, explicitly projected columns may be absent if they were not present in this fragment.</p>
<p>To receive a record batch stream which is fully filtered and projected, use <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::InMemoryFragment::ScanBatchesAsync__std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_fragment_1a8ed0cd26e0427cac2447a309d5ae3f5d"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesAsync</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16InMemoryFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>An asynchronous version of Scan. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDatasetE">
<span id="_CPPv3N5arrow7dataset15InMemoryDatasetE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDatasetE"></span><span id="arrow::dataset::InMemoryDataset"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDatasetE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/dataset.h&gt;</em><p>A Source which yields fragments wrapping a stream of record batches. </p>
<p>The record batches must match the schema provided to the source at construction. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE"></span><span id="arrow::dataset::InMemoryDataset::InMemoryDataset__std::shared_ptr:Schema:.std::shared_ptr:RecordBatchGenerator:"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1ac0db38bfc1de7b4d13240efa6f47662c"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE" title="arrow::dataset::InMemoryDataset::RecordBatchGenerator"><span class="n"><span class="pre">RecordBatchGenerator</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">get_batches</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEENSt10shared_ptrI20RecordBatchGeneratorEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Construct a dataset from a schema and a factory of record batch iterators. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector"></span><span id="arrow::dataset::InMemoryDataset::InMemoryDataset__std::shared_ptr:Schema:.RecordBatchVector"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1a479b929b6e00e4722052a6e7b0a231fe"></span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">RecordBatchVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">batches</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI6SchemaEE17RecordBatchVector" title="Permalink to this definition"></a><br /></dt>
<dd><p>Convenience constructor taking a fixed list of batches. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE"></span><span id="arrow::dataset::InMemoryDataset::InMemoryDataset__std::shared_ptr:Table:"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1ab589f1c3da256de2ab3cde77d9b86965"></span><span class="k"><span class="pre">explicit</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InMemoryDataset</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><span class="n"><span class="pre">Table</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">table</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset15InMemoryDatasetENSt10shared_ptrI5TableEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Convenience constructor taking a <a class="reference internal" href="table.html#classarrow_1_1_table"><span class="std std-ref">Table</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset15InMemoryDataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset15InMemoryDataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset15InMemoryDataset9type_nameEv"></span><span id="arrow::dataset::InMemoryDataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1ae39b5d039ae08ed1c6e17bfe1641f380"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset15InMemoryDataset9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::InMemoryDataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1a28d46d3d11d41e4d12f7157a57b6eca4"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset15InMemoryDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return a copy of this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with a different schema. </p>
<p>The copy will view the same Fragments. If the new schema is not compatible with the original dataset’s schema then an error will be raised. </p>
</dd></dl>
</div>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE">
<span id="_CPPv3N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE"></span><span id="_CPPv2N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE"></span><span id="arrow::dataset::InMemoryDataset::RecordBatchGenerator"></span><span class="target" id="classarrow_1_1dataset_1_1_in_memory_dataset_1_1_record_batch_generator"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">RecordBatchGenerator</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset15InMemoryDataset20RecordBatchGeneratorE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/dataset.h&gt;</em></dd></dl>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12UnionDatasetE">
<span id="_CPPv3N5arrow7dataset12UnionDatasetE"></span><span id="_CPPv2N5arrow7dataset12UnionDatasetE"></span><span id="arrow::dataset::UnionDataset"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">UnionDataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset12UnionDatasetE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/dataset.h&gt;</em><p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> wrapping child Datasets. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12UnionDataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset12UnionDataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset12UnionDataset9type_nameEv"></span><span id="arrow::dataset::UnionDataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_1a87c9d5cb26b147127ce28259298ce52a"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12UnionDataset9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::UnionDataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_1a12415d746f8c71d5af09e53818779aae"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset12UnionDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return a copy of this <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with a different schema. </p>
<p>The copy will view the same Fragments. If the new schema is not compatible with the original dataset’s schema then an error will be raised. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector">
<span id="_CPPv3N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector"></span><span id="_CPPv2N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector"></span><span id="arrow::dataset::UnionDataset::Make__std::shared_ptr:Schema:.DatasetVector"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_1a9005125d1f96e53f41bad18a9e28b5b4"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12UnionDatasetE" title="arrow::dataset::UnionDataset"><span class="n"><span class="pre">UnionDataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">DatasetVector</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">children</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset12UnionDataset4MakeENSt10shared_ptrI6SchemaEE13DatasetVector" title="Permalink to this definition"></a><br /></dt>
<dd><p>Construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset"><span class="std std-ref">UnionDataset</span></a> wrapping child Datasets. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong><strong>[in]</strong> the schema of the resulting dataset. </p></li>
<li><p><strong>children</strong><strong>[in]</strong> one or more child Datasets. Their schemas must be identical to schema. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19UnionDatasetFactoryE">
<span id="_CPPv3N5arrow7dataset19UnionDatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset19UnionDatasetFactoryE"></span><span id="arrow::dataset::UnionDatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">UnionDatasetFactory</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset19UnionDatasetFactoryE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/discovery.h&gt;</em><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a> provides a way to inspect/discover a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>’s expected schema before materialization. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset19UnionDatasetFactory9factoriesEv">
<span id="_CPPv3NK5arrow7dataset19UnionDatasetFactory9factoriesEv"></span><span id="_CPPv2NK5arrow7dataset19UnionDatasetFactory9factoriesEv"></span><span id="arrow::dataset::UnionDatasetFactory::factoriesC"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory_1ab8c9b0708cbd6c7c5780d4f05b577b52"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">factories</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset19UnionDatasetFactory9factoriesEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the list of child <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset_factory"><span class="std std-ref">DatasetFactory</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::UnionDatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory_1ad749b1d98f15204a9680b4ba06e4575b"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19UnionDatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the schemas of the Datasets. </p>
<p>Instead of applying options globally, it applies at each child factory. This will not respect <code class="docutils literal notranslate"><span class="pre">options.fragments</span></code> exactly, but will respect the spirit of peeking the first fragments or all of them. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::UnionDatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_union_dataset_factory_1ae3c038ae756ed8e63a61a0138d8dc0a5"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset19UnionDatasetFactory6FinishE13FinishOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
</div>
</dd></dl>
<div class="section" id="file-system-datasets">
<h3>File System Datasets<a class="headerlink" href="#file-system-datasets" title="Permalink to this headline"></a></h3>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptionsE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptionsE"></span><span id="arrow::dataset::FileSystemFactoryOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/discovery.h&gt;</em><div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions12partitioningE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions12partitioningE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions12partitioningE"></span><span id="arrow::dataset::FileSystemFactoryOptions::partitioning__PartitioningOrFactory"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1a69af22fb504696928af8963c04af38bb"></span><a class="reference internal" href="#_CPPv4N5arrow7dataset21PartitioningOrFactoryE" title="arrow::dataset::PartitioningOrFactory"><span class="n"><span class="pre">PartitioningOrFactory</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12Partitioning7DefaultEv" title="arrow::dataset::Partitioning::Default"><span class="n"><span class="pre">Default</span></span></a><span class="p"><span class="pre">(</span></span><span class="p"><span class="pre">)</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions12partitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Either an explicit <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> or a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning_factory"><span class="std std-ref">PartitioningFactory</span></a> to discover one. </p>
<p>If a factory is provided, it will be used to infer a schema for partition fields based on file and directory paths then construct a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. The default is a <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> which will yield no partition information.</p>
<p>The (explicit or discovered) partitioning will be applied to discovered files and the resulting partition information embedded in the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE"></span><span id="arrow::dataset::FileSystemFactoryOptions::partition_base_dir__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1a5bfddc4e322fab63c777587eecfcf71f"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partition_base_dir</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions18partition_base_dirE" title="Permalink to this definition"></a><br /></dt>
<dd><p>For the purposes of applying the partitioning, paths will be stripped of the partition_base_dir. </p>
<p>Files not matching the partition_base_dir prefix will be skipped for partition discovery. The ignored files will still be part of the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a>, but will not have partition information.</p>
<p>Example: partition_base_dir = “/dataset”;</p>
<p><ul class="simple">
<li><p>“/dataset/US/sales.csv” -&gt; “US/sales.csv” will be given to the partitioning</p></li>
<li><p>”/home/john/late_sales.csv” -&gt; Will be ignored for partition discovery.</p></li>
</ul>
</p>
<p>This is useful for partitioning which parses directory when ordering is important, e.g. <a class="reference internal" href="#classarrow_1_1dataset_1_1_directory_partitioning"><span class="std std-ref">DirectoryPartitioning</span></a>. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE"></span><span id="arrow::dataset::FileSystemFactoryOptions::exclude_invalid_files__b"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1afe6dcef4df15e0f633068fde243e58c9"></span><span class="kt"><span class="pre">bool</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">exclude_invalid_files</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="k"><span class="pre">false</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions21exclude_invalid_filesE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Invalid files (via selector or explicitly) will be excluded by checking with the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_format_1ae934a7b69d0402f36aecfa6037fa1343"><span class="std std-ref">FileFormat::IsSupported</span></a> method. </p>
<p>This will incur IO for each files in a serial and single threaded fashion. Disabling this feature will skip the IO, but unsupported files may be present in the <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> (resulting in an error at scan time). </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE">
<span id="_CPPv3N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE"></span><span id="_CPPv2N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE"></span><span id="arrow::dataset::FileSystemFactoryOptions::selector_ignore_prefixes__std::vector:ss:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_factory_options_1af00c6508a0956e77d535c7cd764b33a4"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">selector_ignore_prefixes</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="p"><span class="pre">{</span></span><span class="s"><span class="pre">&quot;.&quot;</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">&quot;_&quot;</span></span><span class="p"><span class="pre">,</span></span><span class="p"><span class="pre">}</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptions24selector_ignore_prefixesE" title="Permalink to this definition"></a><br /></dt>
<dd><p>When discovering from a Selector (and not from an explicit file list), ignore files and directories matching any of these prefixes. </p>
<p>Example (with selector = “/dataset/&amp;zwj;**”): selector_ignore_prefixes = {“_”, “.DS_STORE” };</p>
<p><ul class="simple">
<li><p>“/dataset/data.csv” -&gt; not ignored</p></li>
<li><p>”/dataset/_metadata” -&gt; ignored</p></li>
<li><p>”/dataset/.DS_STORE” -&gt; ignored</p></li>
<li><p>”/dataset/_hidden/dat” -&gt; ignored</p></li>
<li><p>”/dataset/nested/.DS_STORE” -&gt; ignored </p></li>
</ul>
</p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactoryE">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactoryE"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactoryE"></span><span id="arrow::dataset::FileSystemDatasetFactory"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemDatasetFactory</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactoryE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/discovery.h&gt;</em><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> creates a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> from a vector of <a class="reference internal" href="filesystem.html#structarrow_1_1fs_1_1_file_info"><span class="std std-ref">fs::FileInfo</span></a> or a <a class="reference internal" href="filesystem.html#structarrow_1_1fs_1_1_file_selector"><span class="std std-ref">fs::FileSelector</span></a>. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::InspectSchemas__InspectOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a89747f36564fd22e1110048aa3607535"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">InspectSchemas</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset14InspectOptionsE" title="arrow::dataset::InspectOptions"><span class="n"><span class="pre">InspectOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory14InspectSchemasE14InspectOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get the schemas of the Fragments and <a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Finish__FinishOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1ac2f094573a36e0b76601a5e6c0517c9a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset13FinishOptionsE" title="arrow::dataset::FinishOptions"><span class="n"><span class="pre">FinishOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory6FinishE13FinishOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> with the given options. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__std::shared_ptr:fs::FileSystem:.std::vector:ss:CR.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a2be4ad879d3e94308e8ae08b7d148044"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">paths</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEERKNSt6vectorINSt6stringEEENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from an explicit list of paths. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>filesystem</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>paths</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__std::shared_ptr:fs::FileSystem:.fs::FileSelector.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1a86b89c3ebe5cdab3cf7f7dbd9a633359"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs12FileSelectorE" title="arrow::fs::FileSelector"><span class="n"><span class="pre">FileSelector</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">selector</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt10shared_ptrIN2fs10FileSystemEEEN2fs12FileSelectorENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from a <a class="reference internal" href="filesystem.html#structarrow_1_1fs_1_1_file_selector"><span class="std std-ref">fs::FileSelector</span></a>. </p>
<p>The selector will expand to a vector of FileInfo. The expansion/crawling is performed in this function call. Thus, the finalized <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> is working with a snapshot of the filesystem. If options.partition_base_dir is not provided, it will be overwritten with selector.base_dir.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>filesystem</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>selector</strong><strong>[in]</strong> used to crawl and search files </p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions">
<span id="_CPPv3N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="_CPPv2N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions"></span><span id="arrow::dataset::FileSystemDatasetFactory::Make__ss.std::shared_ptr:FileFormat:.FileSystemFactoryOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_factory_1ab3e9380381cab2b628c4b23ae8690983"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><span class="n"><span class="pre">DatasetFactory</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">uri</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <a class="reference internal" href="#_CPPv4N5arrow7dataset24FileSystemFactoryOptionsE" title="arrow::dataset::FileSystemFactoryOptions"><span class="n"><span class="pre">FileSystemFactoryOptions</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset24FileSystemDatasetFactory4MakeENSt6stringENSt10shared_ptrI10FileFormatEE24FileSystemFactoryOptions" title="Permalink to this definition"></a><br /></dt>
<dd><p>Build a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset_factory"><span class="std std-ref">FileSystemDatasetFactory</span></a> from an uri including filesystem information. </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>uri</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>format</strong><strong>[in]</strong> passed to <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> </p></li>
<li><p><strong>options</strong><strong>[in]</strong> see <a class="reference internal" href="#structarrow_1_1dataset_1_1_file_system_factory_options"><span class="std std-ref">FileSystemFactoryOptions</span></a> for more information. </p></li>
</ul>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileSourceE">
<span id="_CPPv3N5arrow7dataset10FileSourceE"></span><span id="_CPPv2N5arrow7dataset10FileSourceE"></span><span id="arrow::dataset::FileSource"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileSource</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileSourceE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>The path and filesystem where an actual file is located or a buffer which can be read like a file. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource11compressionEv">
<span id="_CPPv3NK5arrow7dataset10FileSource11compressionEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource11compressionEv"></span><span id="arrow::dataset::FileSource::compressionC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a083a2d7e4b982b4f2df40faa1c252fab"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="n"><span class="pre">Compression</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="utilities.html#_CPPv4N5arrow11Compression4typeE" title="arrow::Compression::type"><span class="n"><span class="pre">type</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">compression</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource11compressionEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the type of raw compression on the file, if any. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource4pathEv">
<span id="_CPPv3NK5arrow7dataset10FileSource4pathEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource4pathEv"></span><span id="arrow::dataset::FileSource::pathC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1ac357952408c00a9c4dcc347a237c759d"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">path</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource4pathEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the file path, if any. Only valid when file source wraps a path. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource10filesystemEv">
<span id="_CPPv3NK5arrow7dataset10FileSource10filesystemEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource10filesystemEv"></span><span id="arrow::dataset::FileSource::filesystemC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a364996bb9fcb7ba51b852c4d82769a70"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">filesystem</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource10filesystemEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the filesystem, if any. Otherwise returns nullptr. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource6bufferEv">
<span id="_CPPv3NK5arrow7dataset10FileSource6bufferEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource6bufferEv"></span><span id="arrow::dataset::FileSource::bufferC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a5d289e2aced706dc4f923cf4af801fdd"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="memory.html#_CPPv4N5arrow6BufferE" title="arrow::Buffer"><span class="n"><span class="pre">Buffer</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">buffer</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource6bufferEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the buffer containing the file, if any. Otherwise returns nullptr. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource4OpenEv">
<span id="_CPPv3NK5arrow7dataset10FileSource4OpenEv"></span><span id="_CPPv2NK5arrow7dataset10FileSource4OpenEv"></span><span id="arrow::dataset::FileSource::OpenC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1a08a746f43fce78b9c9a1e12aa4367ee7"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io16RandomAccessFileE" title="arrow::io::RandomAccessFile"><span class="n"><span class="pre">RandomAccessFile</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Open</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource4OpenEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get a RandomAccessFile which views this file source. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileSource14OpenCompressedEN4util8optionalIN11Compression4typeEEE">
<span id="_CPPv3NK5arrow7dataset10FileSource14OpenCompressedEN4util8optionalIN11Compression4typeEEE"></span><span id="_CPPv2NK5arrow7dataset10FileSource14OpenCompressedEN4util8optionalIN11Compression4typeEEE"></span><span id="arrow::dataset::FileSource::OpenCompressed__util::optional:Compression::type:C"></span><span class="target" id="classarrow_1_1dataset_1_1_file_source_1ab7e0bcd3347078cc6cbdbea3056b0c0b"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io11InputStreamE" title="arrow::io::InputStream"><span class="n"><span class="pre">InputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">OpenCompressed</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">util</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">optional</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">Compression</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="utilities.html#_CPPv4N5arrow11Compression4typeE" title="arrow::Compression::type"><span class="n"><span class="pre">type</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">compression</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">util</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">nullopt</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileSource14OpenCompressedEN4util8optionalIN11Compression4typeEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get an InputStream which views this file source (and decompresses if needed) </p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>compression</strong><strong>[in]</strong> If nullopt, guess the compression scheme from the filename, else decompress with the given codec </p>
</dd>
</dl>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormatE">
<span id="_CPPv3N5arrow7dataset10FileFormatE"></span><span id="_CPPv2N5arrow7dataset10FileFormatE"></span><span id="arrow::dataset::FileFormat"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileFormat</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">enable_shared_from_this</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormatE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>Base class for file format implementation. </p>
<p>Subclassed by arrow::dataset::CsvFileFormat, arrow::dataset::IpcFileFormat, arrow::dataset::ParquetFileFormat</p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat9type_nameEv">
<span id="_CPPv3NK5arrow7dataset10FileFormat9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset10FileFormat9type_nameEv"></span><span id="arrow::dataset::FileFormat::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1ab089438dfec369581face93a00fdeb08"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>The name identifying the kind of file format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource">
<span id="_CPPv3NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource"></span><span id="arrow::dataset::FileFormat::IsSupported__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1ae934a7b69d0402f36aecfa6037fa1343"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="kt"><span class="pre">bool</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">IsSupported</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat11IsSupportedERK10FileSource" title="Permalink to this definition"></a><br /></dt>
<dd><p>Indicate if the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a> is supported/readable by this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat7InspectERK10FileSource">
<span id="_CPPv3NK5arrow7dataset10FileFormat7InspectERK10FileSource"></span><span id="_CPPv2NK5arrow7dataset10FileFormat7InspectERK10FileSource"></span><span id="arrow::dataset::FileFormat::Inspect__FileSourceCRC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a9c4a4144a9dd512a9bb1a165bb3961c9"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Inspect</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">source</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat7InspectERK10FileSource" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the schema of the file if possible. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat8ScanFileERKNSt10shared_ptrI11ScanOptionsEERKNSt10shared_ptrI12FileFragmentEE">
<span id="_CPPv3NK5arrow7dataset10FileFormat8ScanFileERKNSt10shared_ptrI11ScanOptionsEERKNSt10shared_ptrI12FileFragmentEE"></span><span id="_CPPv2NK5arrow7dataset10FileFormat8ScanFileERKNSt10shared_ptrI11ScanOptionsEERKNSt10shared_ptrI12FileFragmentEE"></span><span id="arrow::dataset::FileFormat::ScanFile__std::shared_ptr:ScanOptions:CR.std::shared_ptr:FileFragment:CRC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a9977e5c557a67ee509d36afde97c9a7a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ScanTaskIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanFile</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span>, <span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">file</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat8ScanFileERKNSt10shared_ptrI11ScanOptionsEERKNSt10shared_ptrI12FileFragmentEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Open a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for scanning. </p>
<p>May populate lazy properties of the <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10ExpressionNSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10ExpressionNSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10ExpressionNSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::FileFormat::MakeFragment__FileSource.Expression.std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a8353a87948463ba231e90ce4e12e154a"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10ExpressionNSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Open a fragment. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10Expression">
<span id="_CPPv3N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10Expression"></span><span id="_CPPv2N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10Expression"></span><span id="arrow::dataset::FileFormat::MakeFragment__FileSource.Expression"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a0211b20504ca412bfb996f08246935e8"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">partition_expression</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSource10Expression" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE">
<span id="_CPPv3N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::FileFormat::MakeFragment__FileSource.std::shared_ptr:Schema:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a9e605ca731c0c18f2d34291aedeadb30"></span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeFragment</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileSourceE" title="arrow::dataset::FileSource"><span class="n"><span class="pre">FileSource</span></span></a><span class="w"> </span><span class="n sig-param"><span class="pre">source</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">physical_schema</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="n"><span class="pre">NULLPTR</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat12MakeFragmentE10FileSourceNSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a> for a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_source"><span class="std std-ref">FileSource</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEE">
<span id="_CPPv3NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEE"></span><span id="_CPPv2NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEE"></span><span id="arrow::dataset::FileFormat::MakeWriter__std::shared_ptr:io::OutputStream:.std::shared_ptr:Schema:.std::shared_ptr:FileWriteOptions:C"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a0becfc1089824d16ec11c5cb3f9dd962"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileWriterE" title="arrow::dataset::FileWriter"><span class="n"><span class="pre">FileWriter</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">MakeWriter</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">io</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><span class="n"><span class="pre">OutputStream</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">destination</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset10FileFormat10MakeWriterENSt10shared_ptrIN2io12OutputStreamEEENSt10shared_ptrI6SchemaEENSt10shared_ptrI16FileWriteOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a writer for this format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat19DefaultWriteOptionsEv">
<span id="_CPPv3N5arrow7dataset10FileFormat19DefaultWriteOptionsEv"></span><span id="_CPPv2N5arrow7dataset10FileFormat19DefaultWriteOptionsEv"></span><span id="arrow::dataset::FileFormat::DefaultWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1aec4ca5f3edb4c2d184dad6a5ef05ad20"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">DefaultWriteOptions</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat19DefaultWriteOptionsEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Get default write options for this format. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileFormat29default_fragment_scan_optionsE">
<span id="_CPPv3N5arrow7dataset10FileFormat29default_fragment_scan_optionsE"></span><span id="_CPPv2N5arrow7dataset10FileFormat29default_fragment_scan_optionsE"></span><span id="arrow::dataset::FileFormat::default_fragment_scan_options__std::shared_ptr:FragmentScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_format_1a0fca7a90a8f73961d34f8c62e9ebedcf"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><span class="n"><span class="pre">FragmentScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">default_fragment_scan_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileFormat29default_fragment_scan_optionsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Options affecting how this format is scanned. </p>
<p>The options here can be overridden at scan time. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragmentE">
<span id="_CPPv3N5arrow7dataset12FileFragmentE"></span><span id="_CPPv2N5arrow7dataset12FileFragmentE"></span><span id="arrow::dataset::FileFragment"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileFragment</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset8FragmentE" title="arrow::dataset::Fragment"><span class="n"><span class="pre">Fragment</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a> that is stored in a file with a known format. </p>
<p>Subclassed by arrow::dataset::ParquetFileFragment</p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragment4ScanENSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset12FileFragment4ScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset12FileFragment4ScanENSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::FileFragment::Scan__std::shared_ptr:ScanOptions:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment_1a75e7f4660a81f88d6e0c71f63550039f"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">ScanTaskIterator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Scan</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragment4ScanENSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Scan returns an iterator of ScanTasks, each of which yields RecordBatches from this <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>. </p>
<p>Note that batches yielded using this method will not be filtered and may not align with the <a class="reference internal" href="#classarrow_1_1dataset_1_1_fragment"><span class="std std-ref">Fragment</span></a>’s schema. In particular, note that columns referenced by the filter may be present in yielded batches even if they are not projected (so that they are available when a filter is applied). Additionally, explicitly projected columns may be absent if they were not present in this fragment.</p>
<p>To receive a record batch stream which is fully filtered and projected, use <a class="reference internal" href="#classarrow_1_1dataset_1_1_scanner"><span class="std std-ref">Scanner</span></a>. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE">
<span id="_CPPv3N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="_CPPv2N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE"></span><span id="arrow::dataset::FileFragment::ScanBatchesAsync__std::shared_ptr:ScanOptions:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_file_fragment_1a521083e64f25370d66a39b87d8a28839"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">RecordBatchGenerator</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ScanBatchesAsync</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset11ScanOptionsE" title="arrow::dataset::ScanOptions"><span class="n"><span class="pre">ScanOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">options</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset12FileFragment16ScanBatchesAsyncERKNSt10shared_ptrI11ScanOptionsEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>An asynchronous version of Scan. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17FileSystemDatasetE">
<span id="_CPPv3N5arrow7dataset17FileSystemDatasetE"></span><span id="_CPPv2N5arrow7dataset17FileSystemDatasetE"></span><span id="arrow::dataset::FileSystemDataset"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemDataset</span></span></span><span class="w"> </span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="k"><span class="pre">public</span></span><span class="w"> </span><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><a class="headerlink" href="#_CPPv4N5arrow7dataset17FileSystemDatasetE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_dataset"><span class="std std-ref">Dataset</span></a> of FileFragments. </p>
<p>A <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a> is composed of one or more <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>. The fragments are independent and don’t need to share the same format and/or filesystem. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset9type_nameEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset9type_nameEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset9type_nameEv"></span><span id="arrow::dataset::FileSystemDataset::type_nameC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a9efe37fcf02fce6fe246c9a4a8163190"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">type_name</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset9type_nameEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the type name of the dataset. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE"></span><span id="arrow::dataset::FileSystemDataset::ReplaceSchema__std::shared_ptr:Schema:C"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a652887883af7444deee7c87f8e4b59a2"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><span class="n"><span class="pre">Dataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">ReplaceSchema</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="k"><span class="pre">override</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset13ReplaceSchemaENSt10shared_ptrI6SchemaEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Replace the schema of the dataset. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset5filesEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset5filesEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset5filesEv"></span><span id="arrow::dataset::FileSystemDataset::filesC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1acb41216cc0c88906fec2a169093875f4"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">files</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset5filesEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the path of files. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset6formatEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset6formatEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset6formatEv"></span><span id="arrow::dataset::FileSystemDataset::formatC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a271030810dabf0c541d27eeea16abdd2"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">format</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset6formatEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the format. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4NK5arrow7dataset17FileSystemDataset10filesystemEv">
<span id="_CPPv3NK5arrow7dataset17FileSystemDataset10filesystemEv"></span><span id="_CPPv2NK5arrow7dataset17FileSystemDataset10filesystemEv"></span><span id="arrow::dataset::FileSystemDataset::filesystemC"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a5e059df7544b7f3c6e7ae0ea11407c39"></span><span class="k"><span class="pre">inline</span></span><span class="w"> </span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="sig-name descname"><span class="n"><span class="pre">filesystem</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><span class="w"> </span><span class="k"><span class="pre">const</span></span><a class="headerlink" href="#_CPPv4NK5arrow7dataset17FileSystemDataset10filesystemEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Return the filesystem. May be nullptr if the fragments wrap buffers. </p>
</dd></dl>
</div>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-static-functions">Public Static Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEE10ExpressionNSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEE">
<span id="_CPPv3N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEE10ExpressionNSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEE"></span><span id="_CPPv2N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEE10ExpressionNSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEE"></span><span id="arrow::dataset::FileSystemDataset::Make__std::shared_ptr:Schema:.Expression.std::shared_ptr:FileFormat:.std::shared_ptr:fs::FileSystem:.std::vector:std::shared_ptr:FileFragment::"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1a2da5853198de992f736482b2501a5d7a"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4I0EN5arrow6ResultE" title="arrow::Result"><span class="n"><span class="pre">Result</span></span></a><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset17FileSystemDatasetE" title="arrow::dataset::FileSystemDataset"><span class="n"><span class="pre">FileSystemDataset</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Make</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="datatype.html#_CPPv4N5arrow6SchemaE" title="arrow::Schema"><span class="n"><span class="pre">Schema</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">schema</span></span>, <span class="n"><span class="pre">Expression</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">root_partition</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><span class="n"><span class="pre">FileFormat</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">format</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">filesystem</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">vector</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12FileFragmentE" title="arrow::dataset::FileFragment"><span class="n"><span class="pre">FileFragment</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">fragments</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17FileSystemDataset4MakeENSt10shared_ptrI6SchemaEE10ExpressionNSt10shared_ptrI10FileFormatEENSt10shared_ptrIN2fs10FileSystemEEENSt6vectorINSt10shared_ptrI12FileFragmentEEEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Create a <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_system_dataset"><span class="std std-ref">FileSystemDataset</span></a>. </p>
<p>
Note that fragments wrapping files resident in differing filesystems are not permitted; to work with multiple filesystems use a <a class="reference internal" href="#classarrow_1_1dataset_1_1_union_dataset"><span class="std std-ref">UnionDataset</span></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong><strong>[in]</strong> the schema of the dataset </p></li>
<li><p><strong>root_partition</strong><strong>[in]</strong> the partition expression of the dataset </p></li>
<li><p><strong>format</strong><strong>[in]</strong> the format of each <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>. </p></li>
<li><p><strong>filesystem</strong><strong>[in]</strong> the filesystem of each <a class="reference internal" href="#classarrow_1_1dataset_1_1_file_fragment"><span class="std std-ref">FileFragment</span></a>, or nullptr if the fragments wrap buffers. </p></li>
<li><p><strong>fragments</strong><strong>[in]</strong> list of fragments to create the dataset from.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>A constructed dataset. </p>
</dd>
</dl>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE">
<span id="_CPPv3N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE"></span><span id="_CPPv2N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE"></span><span id="arrow::dataset::FileSystemDataset::Write__FileSystemDatasetWriteOptionsCR.std::shared_ptr:Scanner:"></span><span class="target" id="classarrow_1_1dataset_1_1_file_system_dataset_1acf725306aea3c8b46c34a2c160147e49"></span><span class="k"><span class="pre">static</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><a class="reference internal" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><span class="n"><span class="pre">FileSystemDatasetWriteOptions</span></span></a><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">write_options</span></span>, <span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><span class="n"><span class="pre">Scanner</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">scanner</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset17FileSystemDataset5WriteERK29FileSystemDatasetWriteOptionsNSt10shared_ptrI7ScannerEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Write a dataset. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset16FileWriteOptionsE">
<span id="_CPPv3N5arrow7dataset16FileWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset16FileWriteOptionsE"></span><span id="arrow::dataset::FileWriteOptions"></span><span class="target" id="classarrow_1_1dataset_1_1_file_write_options"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">FileWriteOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>Options for writing a file of this format. </p>
<p>Subclassed by arrow::dataset::IpcFileWriteOptions, arrow::dataset::ParquetFileWriteOptions</p>
</dd></dl>
<dl class="cpp class">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriterE">
<span id="_CPPv3N5arrow7dataset10FileWriterE"></span><span id="_CPPv2N5arrow7dataset10FileWriterE"></span><span id="arrow::dataset::FileWriter"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer"></span><span class="k"><span class="pre">class</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileWriter</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriterE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>A writer for this format. </p>
<p>Subclassed by arrow::dataset::IpcFileWriter, arrow::dataset::ParquetFileWriter</p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-functions">Public Functions</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE">
<span id="_CPPv3N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="_CPPv2N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE"></span><span id="arrow::dataset::FileWriter::Write__std::shared_ptr:RecordBatch:CR"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1ab2d336af7fa7b0e7233f4faf7faaa7a6"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><span class="k"><span class="pre">const</span></span><span class="w"> </span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><span class="n"><span class="pre">RecordBatch</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="p"><span class="pre">&amp;</span></span><span class="n sig-param"><span class="pre">batch</span></span><span class="sig-paren">)</span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">0</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriter5WriteERKNSt10shared_ptrI11RecordBatchEE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Write the given batch. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader">
<span id="_CPPv3N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader"></span><span id="_CPPv2N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader"></span><span id="arrow::dataset::FileWriter::Write__RecordBatchReaderP"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1a7be5615da56f7ee21b02805c869a7b91"></span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Write</span></span></span><span class="sig-paren">(</span><a class="reference internal" href="table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><span class="n"><span class="pre">RecordBatchReader</span></span></a><span class="w"> </span><span class="p"><span class="pre">*</span></span><span class="n sig-param"><span class="pre">batches</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriter5WriteEP17RecordBatchReader" title="Permalink to this definition"></a><br /></dt>
<dd><p>Write all batches from the reader. </p>
</dd></dl>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset10FileWriter6FinishEv">
<span id="_CPPv3N5arrow7dataset10FileWriter6FinishEv"></span><span id="_CPPv2N5arrow7dataset10FileWriter6FinishEv"></span><span id="arrow::dataset::FileWriter::Finish"></span><span class="target" id="classarrow_1_1dataset_1_1_file_writer_1aa65d6e19a015b71fd1cfe3ea8e965611"></span><span class="k"><span class="pre">virtual</span></span><span class="w"> </span><a class="reference internal" href="support.html#_CPPv4N5arrow6StatusE" title="arrow::Status"><span class="n"><span class="pre">Status</span></span></a><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">Finish</span></span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv4N5arrow7dataset10FileWriter6FinishEv" title="Permalink to this definition"></a><br /></dt>
<dd><p>Indicate that writing is done. </p>
</dd></dl>
</div>
</dd></dl>
<dl class="cpp struct">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptionsE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptionsE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options"></span><span class="k"><span class="pre">struct</span></span><span class="w"> </span><span class="sig-prename descclassname"><span class="n"><span class="pre">arrow</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">dataset</span></span><span class="p"><span class="pre">::</span></span></span><span class="sig-name descname"><span class="n"><span class="pre">FileSystemDatasetWriteOptions</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="Permalink to this definition"></a><br /></dt>
<dd><em>#include &lt;arrow/dataset/file_base.h&gt;</em><p>Options for writing a dataset. </p>
<div class="breathe-sectiondef docutils container">
<p class="breathe-sectiondef-title rubric" id="breathe-section-title-public-members">Public Members</p>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::file_write_options__std::shared_ptr:FileWriteOptions:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a03a653d68b2c626088f7fe4c07a21493"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset16FileWriteOptionsE" title="arrow::dataset::FileWriteOptions"><span class="n"><span class="pre">FileWriteOptions</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">file_write_options</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions18file_write_optionsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Options for individual fragment writing. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::filesystem__std::shared_ptr:fs::FileSystem:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a15f1d8b08b576ae26b8f0c6e0bf78f61"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><span class="n"><span class="pre">fs</span></span><span class="p"><span class="pre">::</span></span><a class="reference internal" href="filesystem.html#_CPPv4N5arrow2fs10FileSystemE" title="arrow::fs::FileSystem"><span class="n"><span class="pre">FileSystem</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">filesystem</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions10filesystemE" title="Permalink to this definition"></a><br /></dt>
<dd><p>FileSystem into which a dataset will be written. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::base_dir__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a9111e2576afd0b716de9e5db388e12ce"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">base_dir</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions8base_dirE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Root directory into which the dataset will be written. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::partitioning__std::shared_ptr:Partitioning:"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a779361c86adf6237e58cb65935fc9bad"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">shared_ptr</span></span><span class="p"><span class="pre">&lt;</span></span><a class="reference internal" href="#_CPPv4N5arrow7dataset12PartitioningE" title="arrow::dataset::Partitioning"><span class="n"><span class="pre">Partitioning</span></span></a><span class="p"><span class="pre">&gt;</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">partitioning</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions12partitioningE" title="Permalink to this definition"></a><br /></dt>
<dd><p><a class="reference internal" href="#classarrow_1_1dataset_1_1_partitioning"><span class="std std-ref">Partitioning</span></a> used to generate fragment paths. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::max_partitions__i"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a2bca12ddd6e2c3252253cb0feb232f80"></span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">max_partitions</span></span></span><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="m"><span class="pre">1024</span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions14max_partitionsE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Maximum number of partitions any batch may be written into, default is 1K. </p>
</dd></dl>
<dl class="cpp var">
<dt class="sig sig-object cpp" id="_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE">
<span id="_CPPv3N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE"></span><span id="_CPPv2N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE"></span><span id="arrow::dataset::FileSystemDatasetWriteOptions::basename_template__ss"></span><span class="target" id="structarrow_1_1dataset_1_1_file_system_dataset_write_options_1a9cf0bbd89148c7ea5481c8b0f5c674fd"></span><span class="n"><span class="pre">std</span></span><span class="p"><span class="pre">::</span></span><span class="n"><span class="pre">string</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">basename_template</span></span></span><a class="headerlink" href="#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptions17basename_templateE" title="Permalink to this definition"></a><br /></dt>
<dd><p>Template string used to generate fragment basenames. </p>
<p>{i} will be replaced by an auto incremented integer. </p>
</dd></dl>
</div>
</dd></dl>
</div>
<div class="section" id="file-formats">
<h3>File Formats<a class="headerlink" href="#file-formats" title="Permalink to this headline"></a></h3>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>doxygengroup: Cannot find group “dataset-file-formats” in doxygen xml output for project “arrow_cpp” from directory: ../../cpp/apidoc/xml</p>
</div>
</div>
</div>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="filesystem.html" title="previous page">Filesystems</a>
<a class='right-next' id="next-link" href="../../java/index.html" title="next page">Java Implementation</a>
</div>
</main>
</div>
</div>
<script src="../../_static/js/index.1c5a1a01449ed65a7b51.js"></script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright 2016-2019 Apache Software Foundation.<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 4.0.2.<br/>
</p>
</div>
</div>
</footer>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body>
</html>