| |
| <!DOCTYPE html> |
| |
| <html> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>Tabular Datasets — Apache Arrow v7.0.0</title> |
| |
| <link href="../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| |
| |
| <link rel="stylesheet" |
| href="../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/styles/pydata-sphinx-theme.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/tabs.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" /> |
| |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"> |
| |
| <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script> |
| <script src="../_static/jquery.js"></script> |
| <script src="../_static/underscore.js"></script> |
| <script src="../_static/doctools.js"></script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/cpp/dataset.html" /> |
| <link rel="shortcut icon" href="../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Arrow Flight RPC" href="flight.html" /> |
| <link rel="prev" title="Reading JSON files" href="json.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="None"> |
| |
| |
| <!-- Google Analytics --> |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <div class="container-fluid" id="banner"></div> |
| |
| |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| |
| <!-- Only show if we have sidebars configured, else just a small margin --> |
| <div class="col-12 col-md-3 bd-sidebar"> |
| <div class="sidebar-start-items"> |
| <a class="navbar-brand" href="../index.html"> |
| <img src="../_static/arrow.png" class="logo" alt="logo"> |
| </a> |
| |
| <div id="version-search-wrapper"> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 7.0.0 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://arrow.apache.org/docs/{version}"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "cpp/dataset.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("/docs/_static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script> |
| |
| <form id="search-box" class="bd-search d-flex align-items-center" action="../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| |
| </div> |
| |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| <div class="bd-toc-item active"> |
| |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Supported Environments |
| </span> |
| </p> |
| <ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../c_glib/index.html"> |
| C/GLib |
| </a> |
| </li> |
| <li class="toctree-l1 current active has-children"> |
| <a class="reference internal" href="index.html"> |
| C++ |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/> |
| <label for="toctree-checkbox-1"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l2 current active has-children"> |
| <a class="reference internal" href="getting_started.html"> |
| User Guide |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/> |
| <label for="toctree-checkbox-2"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="overview.html"> |
| High-Level Overview |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="conventions.html"> |
| Conventions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="build_system.html"> |
| Using Arrow C++ in your own project |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="memory.html"> |
| Memory Management |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="arrays.html"> |
| Arrays |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="datatypes.html"> |
| Data Types |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="tables.html"> |
| Tabular Data |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="compute.html"> |
| Compute Functions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="streaming_execution.html"> |
| Streaming execution engine |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="io.html"> |
| Input / output and filesystems |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="ipc.html"> |
| Reading and writing the Arrow IPC format |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="parquet.html"> |
| Reading and writing Parquet files |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="csv.html"> |
| Reading and Writing CSV files |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="json.html"> |
| Reading JSON files |
| </a> |
| </li> |
| <li class="toctree-l3 current active"> |
| <a class="current reference internal" href="#"> |
| Tabular Datasets |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="gdb.html"> |
| Debugging code using Arrow |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="examples/index.html"> |
| Examples |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/> |
| <label for="toctree-checkbox-3"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/cmake_minimal_build.html"> |
| Minimal build using CMake |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/compute_and_write_example.html"> |
| Compute and Write CSV Example |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/dataset_documentation_example.html"> |
| Arrow Datasets example |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/row_columnar_conversion.html"> |
| Row to columnar conversion |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/tuple_range_conversion.html"> |
| std::tuple-like ranges to Arrow |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="api.html"> |
| API Reference |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/> |
| <label for="toctree-checkbox-4"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/support.html"> |
| Programming Support |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/memory.html"> |
| Memory (management) |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/datatype.html"> |
| Data Types |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/array.html"> |
| Arrays |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/scalar.html"> |
| Scalars |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/builder.html"> |
| Array Builders |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/table.html"> |
| Two-dimensional Datasets |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/c_abi.html"> |
| C Interfaces |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/compute.html"> |
| Compute Functions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/tensor.html"> |
| Tensors |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/utilities.html"> |
| Utilities |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/async.html"> |
| Asynchronous programming |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/io.html"> |
| Input / output |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/ipc.html"> |
| Arrow IPC |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/formats.html"> |
| File Formats |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/cuda.html"> |
| CUDA support |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/filesystem.html"> |
| Filesystems |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/dataset.html"> |
| Dataset |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md"> |
| C# |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://pkg.go.dev/github.com/apache/arrow/go"> |
| Go |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../java/index.html"> |
| Java |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/> |
| <label for="toctree-checkbox-5"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/vector.html"> |
| ValueVector |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/vector_schema_root.html"> |
| VectorSchemaRoot |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/ipc.html"> |
| Reading/Writing IPC formats |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/algorithm.html"> |
| Java Algorithms |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/dataset.html"> |
| Dataset |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/reference/index.html"> |
| Reference (javadoc) |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../js/index.html"> |
| JavaScript |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/master/julia/Arrow/README.md"> |
| Julia |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md"> |
| MATLAB |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../python/index.html"> |
| Python |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/> |
| <label for="toctree-checkbox-6"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/install.html"> |
| Installing PyArrow |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/getstarted.html"> |
| Getting Started |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/data.html"> |
| Data Types and In-Memory Data Model |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/compute.html"> |
| Compute Functions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/memory.html"> |
| Memory and IO Interfaces |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/ipc.html"> |
| Streaming, Serialization, and IPC |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/filesystems.html"> |
| Filesystem Interface |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../python/filesystems_deprecated.html"> |
| Filesystem Interface (legacy) |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" type="checkbox"/> |
| <label for="toctree-checkbox-7"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.hdfs.connect.html"> |
| pyarrow.hdfs.connect |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.cat.html"> |
| pyarrow.HadoopFileSystem.cat |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.chmod.html"> |
| pyarrow.HadoopFileSystem.chmod |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.chown.html"> |
| pyarrow.HadoopFileSystem.chown |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.delete.html"> |
| pyarrow.HadoopFileSystem.delete |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.df.html"> |
| pyarrow.HadoopFileSystem.df |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.disk_usage.html"> |
| pyarrow.HadoopFileSystem.disk_usage |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.download.html"> |
| pyarrow.HadoopFileSystem.download |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.exists.html"> |
| pyarrow.HadoopFileSystem.exists |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.get_capacity.html"> |
| pyarrow.HadoopFileSystem.get_capacity |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.get_space_used.html"> |
| pyarrow.HadoopFileSystem.get_space_used |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.info.html"> |
| pyarrow.HadoopFileSystem.info |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.ls.html"> |
| pyarrow.HadoopFileSystem.ls |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.mkdir.html"> |
| pyarrow.HadoopFileSystem.mkdir |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.open.html"> |
| pyarrow.HadoopFileSystem.open |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.rename.html"> |
| pyarrow.HadoopFileSystem.rename |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.rm.html"> |
| pyarrow.HadoopFileSystem.rm |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.upload.html"> |
| pyarrow.HadoopFileSystem.upload |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HdfsFile.html"> |
| pyarrow.HdfsFile |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/plasma.html"> |
| The Plasma In-Memory Object Store |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/numpy.html"> |
| NumPy Integration |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/pandas.html"> |
| Pandas Integration |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/timestamps.html"> |
| Timestamps |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/csv.html"> |
| Reading and Writing CSV files |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/feather.html"> |
| Feather File Format |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/json.html"> |
| Reading JSON files |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/parquet.html"> |
| Reading and Writing the Apache Parquet Format |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/dataset.html"> |
| Tabular Datasets |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/extending_types.html"> |
| Extending pyarrow |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../python/integration.html"> |
| PyArrow Integrations |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" type="checkbox"/> |
| <label for="toctree-checkbox-8"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/python_r.html"> |
| Integrating PyArrow with R |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/extending.html"> |
| Using pyarrow from C++ and Cython Code |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/cuda.html"> |
| CUDA Integration |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../python/api.html"> |
| API Reference |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" type="checkbox"/> |
| <label for="toctree-checkbox-9"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/datatypes.html"> |
| Data Types and Schemas |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" type="checkbox"/> |
| <label for="toctree-checkbox-10"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.null.html"> |
| pyarrow.null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.bool_.html"> |
| pyarrow.bool_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int8.html"> |
| pyarrow.int8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int16.html"> |
| pyarrow.int16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int32.html"> |
| pyarrow.int32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int64.html"> |
| pyarrow.int64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint8.html"> |
| pyarrow.uint8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint16.html"> |
| pyarrow.uint16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint32.html"> |
| pyarrow.uint32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint64.html"> |
| pyarrow.uint64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.float16.html"> |
| pyarrow.float16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.float32.html"> |
| pyarrow.float32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.float64.html"> |
| pyarrow.float64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.time32.html"> |
| pyarrow.time32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.time64.html"> |
| pyarrow.time64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.timestamp.html"> |
| pyarrow.timestamp |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.date32.html"> |
| pyarrow.date32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.date64.html"> |
| pyarrow.date64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.duration.html"> |
| pyarrow.duration |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.month_day_nano_interval.html"> |
| pyarrow.month_day_nano_interval |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.binary.html"> |
| pyarrow.binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.string.html"> |
| pyarrow.string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.utf8.html"> |
| pyarrow.utf8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_binary.html"> |
| pyarrow.large_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_string.html"> |
| pyarrow.large_string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_utf8.html"> |
| pyarrow.large_utf8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.decimal128.html"> |
| pyarrow.decimal128 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.list_.html"> |
| pyarrow.list_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_list.html"> |
| pyarrow.large_list |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.map_.html"> |
| pyarrow.map_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.struct.html"> |
| pyarrow.struct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dictionary.html"> |
| pyarrow.dictionary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.field.html"> |
| pyarrow.field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.schema.html"> |
| pyarrow.schema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.from_numpy_dtype.html"> |
| pyarrow.from_numpy_dtype |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.unify_schemas.html"> |
| pyarrow.unify_schemas |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DataType.html"> |
| pyarrow.DataType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DictionaryType.html"> |
| pyarrow.DictionaryType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ListType.html"> |
| pyarrow.ListType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MapType.html"> |
| pyarrow.MapType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StructType.html"> |
| pyarrow.StructType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UnionType.html"> |
| pyarrow.UnionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TimestampType.html"> |
| pyarrow.TimestampType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time32Type.html"> |
| pyarrow.Time32Type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time64Type.html"> |
| pyarrow.Time64Type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryType.html"> |
| pyarrow.FixedSizeBinaryType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Decimal128Type.html"> |
| pyarrow.Decimal128Type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Field.html"> |
| pyarrow.Field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Schema.html"> |
| pyarrow.Schema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ExtensionType.html"> |
| pyarrow.ExtensionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.PyExtensionType.html"> |
| pyarrow.PyExtensionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.register_extension_type.html"> |
| pyarrow.register_extension_type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.unregister_extension_type.html"> |
| pyarrow.unregister_extension_type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_boolean.html"> |
| pyarrow.types.is_boolean |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_integer.html"> |
| pyarrow.types.is_integer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_signed_integer.html"> |
| pyarrow.types.is_signed_integer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_unsigned_integer.html"> |
| pyarrow.types.is_unsigned_integer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int8.html"> |
| pyarrow.types.is_int8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int16.html"> |
| pyarrow.types.is_int16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int32.html"> |
| pyarrow.types.is_int32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int64.html"> |
| pyarrow.types.is_int64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint8.html"> |
| pyarrow.types.is_uint8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint16.html"> |
| pyarrow.types.is_uint16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint32.html"> |
| pyarrow.types.is_uint32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint64.html"> |
| pyarrow.types.is_uint64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_floating.html"> |
| pyarrow.types.is_floating |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_float16.html"> |
| pyarrow.types.is_float16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_float32.html"> |
| pyarrow.types.is_float32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_float64.html"> |
| pyarrow.types.is_float64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_decimal.html"> |
| pyarrow.types.is_decimal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_list.html"> |
| pyarrow.types.is_list |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_list.html"> |
| pyarrow.types.is_large_list |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_struct.html"> |
| pyarrow.types.is_struct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_union.html"> |
| pyarrow.types.is_union |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_nested.html"> |
| pyarrow.types.is_nested |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_temporal.html"> |
| pyarrow.types.is_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_timestamp.html"> |
| pyarrow.types.is_timestamp |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_date.html"> |
| pyarrow.types.is_date |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_date32.html"> |
| pyarrow.types.is_date32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_date64.html"> |
| pyarrow.types.is_date64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_time.html"> |
| pyarrow.types.is_time |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_time32.html"> |
| pyarrow.types.is_time32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_time64.html"> |
| pyarrow.types.is_time64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_null.html"> |
| pyarrow.types.is_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_binary.html"> |
| pyarrow.types.is_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_unicode.html"> |
| pyarrow.types.is_unicode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_string.html"> |
| pyarrow.types.is_string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_binary.html"> |
| pyarrow.types.is_large_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_unicode.html"> |
| pyarrow.types.is_large_unicode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_string.html"> |
| pyarrow.types.is_large_string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_fixed_size_binary.html"> |
| pyarrow.types.is_fixed_size_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_map.html"> |
| pyarrow.types.is_map |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_dictionary.html"> |
| pyarrow.types.is_dictionary |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/arrays.html"> |
| Arrays and Scalars |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" type="checkbox"/> |
| <label for="toctree-checkbox-11"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.array.html"> |
| pyarrow.array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.nulls.html"> |
| pyarrow.nulls |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Array.html"> |
| pyarrow.Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BooleanArray.html"> |
| pyarrow.BooleanArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FloatingPointArray.html"> |
| pyarrow.FloatingPointArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.IntegerArray.html"> |
| pyarrow.IntegerArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int8Array.html"> |
| pyarrow.Int8Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int16Array.html"> |
| pyarrow.Int16Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int32Array.html"> |
| pyarrow.Int32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int64Array.html"> |
| pyarrow.Int64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NullArray.html"> |
| pyarrow.NullArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NumericArray.html"> |
| pyarrow.NumericArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt8Array.html"> |
| pyarrow.UInt8Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt16Array.html"> |
| pyarrow.UInt16Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt32Array.html"> |
| pyarrow.UInt32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt64Array.html"> |
| pyarrow.UInt64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BinaryArray.html"> |
| pyarrow.BinaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StringArray.html"> |
| pyarrow.StringArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryArray.html"> |
| pyarrow.FixedSizeBinaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeBinaryArray.html"> |
| pyarrow.LargeBinaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeStringArray.html"> |
| pyarrow.LargeStringArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time32Array.html"> |
| pyarrow.Time32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time64Array.html"> |
| pyarrow.Time64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date32Array.html"> |
| pyarrow.Date32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date64Array.html"> |
| pyarrow.Date64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TimestampArray.html"> |
| pyarrow.TimestampArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DurationArray.html"> |
| pyarrow.DurationArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MonthDayNanoIntervalArray.html"> |
| pyarrow.MonthDayNanoIntervalArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Decimal128Array.html"> |
| pyarrow.Decimal128Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DictionaryArray.html"> |
| pyarrow.DictionaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ListArray.html"> |
| pyarrow.ListArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeListArray.html"> |
| pyarrow.FixedSizeListArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeListArray.html"> |
| pyarrow.LargeListArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MapArray.html"> |
| pyarrow.MapArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StructArray.html"> |
| pyarrow.StructArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UnionArray.html"> |
| pyarrow.UnionArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ExtensionArray.html"> |
| pyarrow.ExtensionArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.scalar.html"> |
| pyarrow.scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NA.html"> |
| pyarrow.NA |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Scalar.html"> |
| pyarrow.Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BooleanScalar.html"> |
| pyarrow.BooleanScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int8Scalar.html"> |
| pyarrow.Int8Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int16Scalar.html"> |
| pyarrow.Int16Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int32Scalar.html"> |
| pyarrow.Int32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int64Scalar.html"> |
| pyarrow.Int64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt8Scalar.html"> |
| pyarrow.UInt8Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt16Scalar.html"> |
| pyarrow.UInt16Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt32Scalar.html"> |
| pyarrow.UInt32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt64Scalar.html"> |
| pyarrow.UInt64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FloatScalar.html"> |
| pyarrow.FloatScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DoubleScalar.html"> |
| pyarrow.DoubleScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BinaryScalar.html"> |
| pyarrow.BinaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StringScalar.html"> |
| pyarrow.StringScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryScalar.html"> |
| pyarrow.FixedSizeBinaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeBinaryScalar.html"> |
| pyarrow.LargeBinaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeStringScalar.html"> |
| pyarrow.LargeStringScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time32Scalar.html"> |
| pyarrow.Time32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time64Scalar.html"> |
| pyarrow.Time64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date32Scalar.html"> |
| pyarrow.Date32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date64Scalar.html"> |
| pyarrow.Date64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TimestampScalar.html"> |
| pyarrow.TimestampScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DurationScalar.html"> |
| pyarrow.DurationScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MonthDayNanoIntervalScalar.html"> |
| pyarrow.MonthDayNanoIntervalScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Decimal128Scalar.html"> |
| pyarrow.Decimal128Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DictionaryScalar.html"> |
| pyarrow.DictionaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ListScalar.html"> |
| pyarrow.ListScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeListScalar.html"> |
| pyarrow.LargeListScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MapScalar.html"> |
| pyarrow.MapScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StructScalar.html"> |
| pyarrow.StructScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UnionScalar.html"> |
| pyarrow.UnionScalar |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/memory.html"> |
| Buffers and Memory |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" type="checkbox"/> |
| <label for="toctree-checkbox-12"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.allocate_buffer.html"> |
| pyarrow.allocate_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.py_buffer.html"> |
| pyarrow.py_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.foreign_buffer.html"> |
| pyarrow.foreign_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Buffer.html"> |
| pyarrow.Buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ResizableBuffer.html"> |
| pyarrow.ResizableBuffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Codec.html"> |
| pyarrow.Codec |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compress.html"> |
| pyarrow.compress |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.decompress.html"> |
| pyarrow.decompress |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MemoryPool.html"> |
| pyarrow.MemoryPool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.default_memory_pool.html"> |
| pyarrow.default_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.jemalloc_memory_pool.html"> |
| pyarrow.jemalloc_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.mimalloc_memory_pool.html"> |
| pyarrow.mimalloc_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.system_memory_pool.html"> |
| pyarrow.system_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.jemalloc_set_decay_ms.html"> |
| pyarrow.jemalloc_set_decay_ms |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.set_memory_pool.html"> |
| pyarrow.set_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.log_memory_allocations.html"> |
| pyarrow.log_memory_allocations |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.total_allocated_bytes.html"> |
| pyarrow.total_allocated_bytes |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/compute.html"> |
| Compute Functions |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" type="checkbox"/> |
| <label for="toctree-checkbox-13"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.all.html"> |
| pyarrow.compute.all |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.any.html"> |
| pyarrow.compute.any |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.approximate_median.html"> |
| pyarrow.compute.approximate_median |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count.html"> |
| pyarrow.compute.count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count_distinct.html"> |
| pyarrow.compute.count_distinct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.index.html"> |
| pyarrow.compute.index |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.max.html"> |
| pyarrow.compute.max |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.mean.html"> |
| pyarrow.compute.mean |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.min.html"> |
| pyarrow.compute.min |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.min_max.html"> |
| pyarrow.compute.min_max |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.mode.html"> |
| pyarrow.compute.mode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.product.html"> |
| pyarrow.compute.product |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.quantile.html"> |
| pyarrow.compute.quantile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.stddev.html"> |
| pyarrow.compute.stddev |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sum.html"> |
| pyarrow.compute.sum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.tdigest.html"> |
| pyarrow.compute.tdigest |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.variance.html"> |
| pyarrow.compute.variance |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.abs.html"> |
| pyarrow.compute.abs |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.abs_checked.html"> |
| pyarrow.compute.abs_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.add.html"> |
| pyarrow.compute.add |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.add_checked.html"> |
| pyarrow.compute.add_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.divide.html"> |
| pyarrow.compute.divide |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.divide_checked.html"> |
| pyarrow.compute.divide_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.multiply.html"> |
| pyarrow.compute.multiply |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.multiply_checked.html"> |
| pyarrow.compute.multiply_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.negate.html"> |
| pyarrow.compute.negate |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.negate_checked.html"> |
| pyarrow.compute.negate_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.power.html"> |
| pyarrow.compute.power |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.power_checked.html"> |
| pyarrow.compute.power_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sign.html"> |
| pyarrow.compute.sign |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.subtract.html"> |
| pyarrow.compute.subtract |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.subtract_checked.html"> |
| pyarrow.compute.subtract_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_and.html"> |
| pyarrow.compute.bit_wise_and |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_not.html"> |
| pyarrow.compute.bit_wise_not |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_or.html"> |
| pyarrow.compute.bit_wise_or |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_xor.html"> |
| pyarrow.compute.bit_wise_xor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_left.html"> |
| pyarrow.compute.shift_left |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_left_checked.html"> |
| pyarrow.compute.shift_left_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_right.html"> |
| pyarrow.compute.shift_right |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_right_checked.html"> |
| pyarrow.compute.shift_right_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ceil.html"> |
| pyarrow.compute.ceil |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.floor.html"> |
| pyarrow.compute.floor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.round.html"> |
| pyarrow.compute.round |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.round_to_multiple.html"> |
| pyarrow.compute.round_to_multiple |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.trunc.html"> |
| pyarrow.compute.trunc |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ln.html"> |
| pyarrow.compute.ln |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ln_checked.html"> |
| pyarrow.compute.ln_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log10.html"> |
| pyarrow.compute.log10 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log10_checked.html"> |
| pyarrow.compute.log10_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log1p.html"> |
| pyarrow.compute.log1p |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log1p_checked.html"> |
| pyarrow.compute.log1p_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log2.html"> |
| pyarrow.compute.log2 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log2_checked.html"> |
| pyarrow.compute.log2_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.logb.html"> |
| pyarrow.compute.logb |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.logb_checked.html"> |
| pyarrow.compute.logb_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.acos.html"> |
| pyarrow.compute.acos |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.acos_checked.html"> |
| pyarrow.compute.acos_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.asin.html"> |
| pyarrow.compute.asin |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.asin_checked.html"> |
| pyarrow.compute.asin_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.atan.html"> |
| pyarrow.compute.atan |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.atan2.html"> |
| pyarrow.compute.atan2 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cos.html"> |
| pyarrow.compute.cos |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cos_checked.html"> |
| pyarrow.compute.cos_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sin.html"> |
| pyarrow.compute.sin |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sin_checked.html"> |
| pyarrow.compute.sin_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.tan.html"> |
| pyarrow.compute.tan |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.tan_checked.html"> |
| pyarrow.compute.tan_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.equal.html"> |
| pyarrow.compute.equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.greater.html"> |
| pyarrow.compute.greater |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.greater_equal.html"> |
| pyarrow.compute.greater_equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.less.html"> |
| pyarrow.compute.less |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.less_equal.html"> |
| pyarrow.compute.less_equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.not_equal.html"> |
| pyarrow.compute.not_equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.max_element_wise.html"> |
| pyarrow.compute.max_element_wise |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.min_element_wise.html"> |
| pyarrow.compute.min_element_wise |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_.html"> |
| pyarrow.compute.and_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_kleene.html"> |
| pyarrow.compute.and_kleene |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_not.html"> |
| pyarrow.compute.and_not |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_not_kleene.html"> |
| pyarrow.compute.and_not_kleene |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.invert.html"> |
| pyarrow.compute.invert |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.or_.html"> |
| pyarrow.compute.or_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.or_kleene.html"> |
| pyarrow.compute.or_kleene |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.xor.html"> |
| pyarrow.compute.xor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_alnum.html"> |
| pyarrow.compute.ascii_is_alnum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_alpha.html"> |
| pyarrow.compute.ascii_is_alpha |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_decimal.html"> |
| pyarrow.compute.ascii_is_decimal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_lower.html"> |
| pyarrow.compute.ascii_is_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_printable.html"> |
| pyarrow.compute.ascii_is_printable |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_space.html"> |
| pyarrow.compute.ascii_is_space |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_upper.html"> |
| pyarrow.compute.ascii_is_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_alnum.html"> |
| pyarrow.compute.utf8_is_alnum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_alpha.html"> |
| pyarrow.compute.utf8_is_alpha |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_decimal.html"> |
| pyarrow.compute.utf8_is_decimal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_digit.html"> |
| pyarrow.compute.utf8_is_digit |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_lower.html"> |
| pyarrow.compute.utf8_is_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_numeric.html"> |
| pyarrow.compute.utf8_is_numeric |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_printable.html"> |
| pyarrow.compute.utf8_is_printable |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_space.html"> |
| pyarrow.compute.utf8_is_space |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_upper.html"> |
| pyarrow.compute.utf8_is_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_title.html"> |
| pyarrow.compute.ascii_is_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_title.html"> |
| pyarrow.compute.utf8_is_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.string_is_ascii.html"> |
| pyarrow.compute.string_is_ascii |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_capitalize.html"> |
| pyarrow.compute.ascii_capitalize |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_lower.html"> |
| pyarrow.compute.ascii_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_reverse.html"> |
| pyarrow.compute.ascii_reverse |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_swapcase.html"> |
| pyarrow.compute.ascii_swapcase |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_title.html"> |
| pyarrow.compute.ascii_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_upper.html"> |
| pyarrow.compute.ascii_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_length.html"> |
| pyarrow.compute.binary_length |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_repeat.html"> |
| pyarrow.compute.binary_repeat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_replace_slice.html"> |
| pyarrow.compute.binary_replace_slice |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_reverse.html"> |
| pyarrow.compute.binary_reverse |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.replace_substring.html"> |
| pyarrow.compute.replace_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.replace_substring_regex.html"> |
| pyarrow.compute.replace_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_capitalize.html"> |
| pyarrow.compute.utf8_capitalize |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_length.html"> |
| pyarrow.compute.utf8_length |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_lower.html"> |
| pyarrow.compute.utf8_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_replace_slice.html"> |
| pyarrow.compute.utf8_replace_slice |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_reverse.html"> |
| pyarrow.compute.utf8_reverse |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_swapcase.html"> |
| pyarrow.compute.utf8_swapcase |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_title.html"> |
| pyarrow.compute.utf8_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_upper.html"> |
| pyarrow.compute.utf8_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_center.html"> |
| pyarrow.compute.ascii_center |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_lpad.html"> |
| pyarrow.compute.ascii_lpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rpad.html"> |
| pyarrow.compute.ascii_rpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_center.html"> |
| pyarrow.compute.utf8_center |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_lpad.html"> |
| pyarrow.compute.utf8_lpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rpad.html"> |
| pyarrow.compute.utf8_rpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_ltrim.html"> |
| pyarrow.compute.ascii_ltrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_ltrim_whitespace.html"> |
| pyarrow.compute.ascii_ltrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rtrim.html"> |
| pyarrow.compute.ascii_rtrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rtrim_whitespace.html"> |
| pyarrow.compute.ascii_rtrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_trim.html"> |
| pyarrow.compute.ascii_trim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_trim_whitespace.html"> |
| pyarrow.compute.ascii_trim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_ltrim.html"> |
| pyarrow.compute.utf8_ltrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_ltrim_whitespace.html"> |
| pyarrow.compute.utf8_ltrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rtrim.html"> |
| pyarrow.compute.utf8_rtrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rtrim_whitespace.html"> |
| pyarrow.compute.utf8_rtrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_trim.html"> |
| pyarrow.compute.utf8_trim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_trim_whitespace.html"> |
| pyarrow.compute.utf8_trim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_split_whitespace.html"> |
| pyarrow.compute.ascii_split_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.split_pattern.html"> |
| pyarrow.compute.split_pattern |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.split_pattern_regex.html"> |
| pyarrow.compute.split_pattern_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_split_whitespace.html"> |
| pyarrow.compute.utf8_split_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.extract_regex.html"> |
| pyarrow.compute.extract_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_join.html"> |
| pyarrow.compute.binary_join |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_join_element_wise.html"> |
| pyarrow.compute.binary_join_element_wise |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_slice_codeunits.html"> |
| pyarrow.compute.utf8_slice_codeunits |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count_substring.html"> |
| pyarrow.compute.count_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count_substring_regex.html"> |
| pyarrow.compute.count_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ends_with.html"> |
| pyarrow.compute.ends_with |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.find_substring.html"> |
| pyarrow.compute.find_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.find_substring_regex.html"> |
| pyarrow.compute.find_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.index_in.html"> |
| pyarrow.compute.index_in |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_in.html"> |
| pyarrow.compute.is_in |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.match_like.html"> |
| pyarrow.compute.match_like |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.match_substring.html"> |
| pyarrow.compute.match_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.match_substring_regex.html"> |
| pyarrow.compute.match_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.starts_with.html"> |
| pyarrow.compute.starts_with |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.indices_nonzero.html"> |
| pyarrow.compute.indices_nonzero |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_finite.html"> |
| pyarrow.compute.is_finite |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_inf.html"> |
| pyarrow.compute.is_inf |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_nan.html"> |
| pyarrow.compute.is_nan |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_null.html"> |
| pyarrow.compute.is_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_valid.html"> |
| pyarrow.compute.is_valid |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.case_when.html"> |
| pyarrow.compute.case_when |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.choose.html"> |
| pyarrow.compute.choose |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.coalesce.html"> |
| pyarrow.compute.coalesce |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.if_else.html"> |
| pyarrow.compute.if_else |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cast.html"> |
| pyarrow.compute.cast |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ceil_temporal.html"> |
| pyarrow.compute.ceil_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.floor_temporal.html"> |
| pyarrow.compute.floor_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.round_temporal.html"> |
| pyarrow.compute.round_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.strftime.html"> |
| pyarrow.compute.strftime |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.strptime.html"> |
| pyarrow.compute.strptime |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day.html"> |
| pyarrow.compute.day |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day_of_week.html"> |
| pyarrow.compute.day_of_week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day_of_year.html"> |
| pyarrow.compute.day_of_year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.hour.html"> |
| pyarrow.compute.hour |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.iso_week.html"> |
| pyarrow.compute.iso_week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.iso_year.html"> |
| pyarrow.compute.iso_year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.iso_calendar.html"> |
| pyarrow.compute.iso_calendar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.microsecond.html"> |
| pyarrow.compute.microsecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.millisecond.html"> |
| pyarrow.compute.millisecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.minute.html"> |
| pyarrow.compute.minute |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.month.html"> |
| pyarrow.compute.month |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.nanosecond.html"> |
| pyarrow.compute.nanosecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.quarter.html"> |
| pyarrow.compute.quarter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.second.html"> |
| pyarrow.compute.second |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.subsecond.html"> |
| pyarrow.compute.subsecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.us_week.html"> |
| pyarrow.compute.us_week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.week.html"> |
| pyarrow.compute.week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.year.html"> |
| pyarrow.compute.year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.year_month_day.html"> |
| pyarrow.compute.year_month_day |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day_time_interval_between.html"> |
| pyarrow.compute.day_time_interval_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.days_between.html"> |
| pyarrow.compute.days_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.hours_between.html"> |
| pyarrow.compute.hours_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.microseconds_between.html"> |
| pyarrow.compute.microseconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.milliseconds_between.html"> |
| pyarrow.compute.milliseconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.minutes_between.html"> |
| pyarrow.compute.minutes_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.month_day_nano_interval_between.html"> |
| pyarrow.compute.month_day_nano_interval_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.month_interval_between.html"> |
| pyarrow.compute.month_interval_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.nanoseconds_between.html"> |
| pyarrow.compute.nanoseconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.quarters_between.html"> |
| pyarrow.compute.quarters_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.seconds_between.html"> |
| pyarrow.compute.seconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.weeks_between.html"> |
| pyarrow.compute.weeks_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.years_between.html"> |
| pyarrow.compute.years_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.assume_timezone.html"> |
| pyarrow.compute.assume_timezone |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.dictionary_encode.html"> |
| pyarrow.compute.dictionary_encode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.unique.html"> |
| pyarrow.compute.unique |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.value_counts.html"> |
| pyarrow.compute.value_counts |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.array_filter.html"> |
| pyarrow.compute.array_filter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.array_take.html"> |
| pyarrow.compute.array_take |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.drop_null.html"> |
| pyarrow.compute.drop_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.filter.html"> |
| pyarrow.compute.filter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.take.html"> |
| pyarrow.compute.take |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.array_sort_indices.html"> |
| pyarrow.compute.array_sort_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.partition_nth_indices.html"> |
| pyarrow.compute.partition_nth_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.select_k_unstable.html"> |
| pyarrow.compute.select_k_unstable |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sort_indices.html"> |
| pyarrow.compute.sort_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.fill_null_backward.html"> |
| pyarrow.compute.fill_null_backward |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.fill_null_forward.html"> |
| pyarrow.compute.fill_null_forward |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_element.html"> |
| pyarrow.compute.list_element |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_flatten.html"> |
| pyarrow.compute.list_flatten |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_parent_indices.html"> |
| pyarrow.compute.list_parent_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_value_length.html"> |
| pyarrow.compute.list_value_length |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.make_struct.html"> |
| pyarrow.compute.make_struct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.replace_with_mask.html"> |
| pyarrow.compute.replace_with_mask |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.struct_field.html"> |
| pyarrow.compute.struct_field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ArraySortOptions.html"> |
| pyarrow.compute.ArraySortOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.AssumeTimezoneOptions.html"> |
| pyarrow.compute.AssumeTimezoneOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CastOptions.html"> |
| pyarrow.compute.CastOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CountOptions.html"> |
| pyarrow.compute.CountOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CountOptions.html"> |
| pyarrow.compute.CountOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.DayOfWeekOptions.html"> |
| pyarrow.compute.DayOfWeekOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.DictionaryEncodeOptions.html"> |
| pyarrow.compute.DictionaryEncodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ElementWiseAggregateOptions.html"> |
| pyarrow.compute.ElementWiseAggregateOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ExtractRegexOptions.html"> |
| pyarrow.compute.ExtractRegexOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.FilterOptions.html"> |
| pyarrow.compute.FilterOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.IndexOptions.html"> |
| pyarrow.compute.IndexOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.JoinOptions.html"> |
| pyarrow.compute.JoinOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.MakeStructOptions.html"> |
| pyarrow.compute.MakeStructOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.MatchSubstringOptions.html"> |
| pyarrow.compute.MatchSubstringOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ModeOptions.html"> |
| pyarrow.compute.ModeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.NullOptions.html"> |
| pyarrow.compute.NullOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.PadOptions.html"> |
| pyarrow.compute.PadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.PartitionNthOptions.html"> |
| pyarrow.compute.PartitionNthOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.QuantileOptions.html"> |
| pyarrow.compute.QuantileOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ReplaceSliceOptions.html"> |
| pyarrow.compute.ReplaceSliceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ReplaceSubstringOptions.html"> |
| pyarrow.compute.ReplaceSubstringOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RoundOptions.html"> |
| pyarrow.compute.RoundOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RoundTemporalOptions.html"> |
| pyarrow.compute.RoundTemporalOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RoundToMultipleOptions.html"> |
| pyarrow.compute.RoundToMultipleOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ScalarAggregateOptions.html"> |
| pyarrow.compute.ScalarAggregateOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ScalarAggregateOptions.html"> |
| pyarrow.compute.ScalarAggregateOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SelectKOptions.html"> |
| pyarrow.compute.SelectKOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SetLookupOptions.html"> |
| pyarrow.compute.SetLookupOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SliceOptions.html"> |
| pyarrow.compute.SliceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SortOptions.html"> |
| pyarrow.compute.SortOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SplitOptions.html"> |
| pyarrow.compute.SplitOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SplitPatternOptions.html"> |
| pyarrow.compute.SplitPatternOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.StrftimeOptions.html"> |
| pyarrow.compute.StrftimeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.StrptimeOptions.html"> |
| pyarrow.compute.StrptimeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.StructFieldOptions.html"> |
| pyarrow.compute.StructFieldOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TakeOptions.html"> |
| pyarrow.compute.TakeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TDigestOptions.html"> |
| pyarrow.compute.TDigestOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TDigestOptions.html"> |
| pyarrow.compute.TDigestOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TrimOptions.html"> |
| pyarrow.compute.TrimOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.VarianceOptions.html"> |
| pyarrow.compute.VarianceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.WeekOptions.html"> |
| pyarrow.compute.WeekOptions |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/files.html"> |
| Streams and File Access |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" type="checkbox"/> |
| <label for="toctree-checkbox-14"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.input_stream.html"> |
| pyarrow.input_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.output_stream.html"> |
| pyarrow.output_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.memory_map.html"> |
| pyarrow.memory_map |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.create_memory_map.html"> |
| pyarrow.create_memory_map |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NativeFile.html"> |
| pyarrow.NativeFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.OSFile.html"> |
| pyarrow.OSFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.PythonFile.html"> |
| pyarrow.PythonFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BufferReader.html"> |
| pyarrow.BufferReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BufferOutputStream.html"> |
| pyarrow.BufferOutputStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBufferWriter.html"> |
| pyarrow.FixedSizeBufferWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MemoryMappedFile.html"> |
| pyarrow.MemoryMappedFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.CompressedInputStream.html"> |
| pyarrow.CompressedInputStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.CompressedOutputStream.html"> |
| pyarrow.CompressedOutputStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.hdfs.connect.html"> |
| pyarrow.hdfs.connect |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LocalFileSystem.html"> |
| pyarrow.LocalFileSystem |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/tables.html"> |
| Tables and Tensors |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-15" name="toctree-checkbox-15" type="checkbox"/> |
| <label for="toctree-checkbox-15"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.chunked_array.html"> |
| pyarrow.chunked_array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.concat_arrays.html"> |
| pyarrow.concat_arrays |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.concat_tables.html"> |
| pyarrow.concat_tables |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.record_batch.html"> |
| pyarrow.record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.table.html"> |
| pyarrow.table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ChunkedArray.html"> |
| pyarrow.ChunkedArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.RecordBatch.html"> |
| pyarrow.RecordBatch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Table.html"> |
| pyarrow.Table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TableGroupBy.html"> |
| pyarrow.TableGroupBy |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Tensor.html"> |
| pyarrow.Tensor |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/ipc.html"> |
| Serialization and IPC |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-16" name="toctree-checkbox-16" type="checkbox"/> |
| <label for="toctree-checkbox-16"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.new_file.html"> |
| pyarrow.ipc.new_file |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.open_file.html"> |
| pyarrow.ipc.open_file |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.new_stream.html"> |
| pyarrow.ipc.new_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.open_stream.html"> |
| pyarrow.ipc.open_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.read_message.html"> |
| pyarrow.ipc.read_message |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.read_record_batch.html"> |
| pyarrow.ipc.read_record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.get_record_batch_size.html"> |
| pyarrow.ipc.get_record_batch_size |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.read_tensor.html"> |
| pyarrow.ipc.read_tensor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.write_tensor.html"> |
| pyarrow.ipc.write_tensor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.get_tensor_size.html"> |
| pyarrow.ipc.get_tensor_size |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.IpcWriteOptions.html"> |
| pyarrow.ipc.IpcWriteOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.Message.html"> |
| pyarrow.ipc.Message |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.MessageReader.html"> |
| pyarrow.ipc.MessageReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchFileReader.html"> |
| pyarrow.ipc.RecordBatchFileReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchFileWriter.html"> |
| pyarrow.ipc.RecordBatchFileWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchStreamReader.html"> |
| pyarrow.ipc.RecordBatchStreamReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchStreamWriter.html"> |
| pyarrow.ipc.RecordBatchStreamWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.serialize.html"> |
| pyarrow.serialize |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.serialize_to.html"> |
| pyarrow.serialize_to |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.deserialize.html"> |
| pyarrow.deserialize |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.deserialize_components.html"> |
| pyarrow.deserialize_components |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.deserialize_from.html"> |
| pyarrow.deserialize_from |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.read_serialized.html"> |
| pyarrow.read_serialized |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.SerializedPyObject.html"> |
| pyarrow.SerializedPyObject |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.SerializationContext.html"> |
| pyarrow.SerializationContext |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/flight.html"> |
| Arrow Flight |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-17" name="toctree-checkbox-17" type="checkbox"/> |
| <label for="toctree-checkbox-17"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Action.html"> |
| pyarrow.flight.Action |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ActionType.html"> |
| pyarrow.flight.ActionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.DescriptorType.html"> |
| pyarrow.flight.DescriptorType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightDescriptor.html"> |
| pyarrow.flight.FlightDescriptor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightEndpoint.html"> |
| pyarrow.flight.FlightEndpoint |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightInfo.html"> |
| pyarrow.flight.FlightInfo |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Location.html"> |
| pyarrow.flight.Location |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Ticket.html"> |
| pyarrow.flight.Ticket |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Result.html"> |
| pyarrow.flight.Result |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightCallOptions.html"> |
| pyarrow.flight.FlightCallOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightClient.html"> |
| pyarrow.flight.FlightClient |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ClientMiddlewareFactory.html"> |
| pyarrow.flight.ClientMiddlewareFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ClientMiddleware.html"> |
| pyarrow.flight.ClientMiddleware |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightServerBase.html"> |
| pyarrow.flight.FlightServerBase |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.GeneratorStream.html"> |
| pyarrow.flight.GeneratorStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.RecordBatchStream.html"> |
| pyarrow.flight.RecordBatchStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerMiddlewareFactory.html"> |
| pyarrow.flight.ServerMiddlewareFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerMiddleware.html"> |
| pyarrow.flight.ServerMiddleware |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ClientAuthHandler.html"> |
| pyarrow.flight.ClientAuthHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerAuthHandler.html"> |
| pyarrow.flight.ServerAuthHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightMethod.html"> |
| pyarrow.flight.FlightMethod |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.CallInfo.html"> |
| pyarrow.flight.CallInfo |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/formats.html"> |
| Tabular File Formats |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-18" name="toctree-checkbox-18" type="checkbox"/> |
| <label for="toctree-checkbox-18"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ConvertOptions.html"> |
| pyarrow.csv.ConvertOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.CSVStreamingReader.html"> |
| pyarrow.csv.CSVStreamingReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.CSVWriter.html"> |
| pyarrow.csv.CSVWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ISO8601.html"> |
| pyarrow.csv.ISO8601 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ParseOptions.html"> |
| pyarrow.csv.ParseOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ReadOptions.html"> |
| pyarrow.csv.ReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.WriteOptions.html"> |
| pyarrow.csv.WriteOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.open_csv.html"> |
| pyarrow.csv.open_csv |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.read_csv.html"> |
| pyarrow.csv.read_csv |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.write_csv.html"> |
| pyarrow.csv.write_csv |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.InvalidRow.html"> |
| pyarrow.csv.InvalidRow |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.feather.read_feather.html"> |
| pyarrow.feather.read_feather |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.feather.read_table.html"> |
| pyarrow.feather.read_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.feather.write_feather.html"> |
| pyarrow.feather.write_feather |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.json.ReadOptions.html"> |
| pyarrow.json.ReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.json.ParseOptions.html"> |
| pyarrow.json.ParseOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.json.read_json.html"> |
| pyarrow.json.read_json |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetDataset.html"> |
| pyarrow.parquet.ParquetDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetFile.html"> |
| pyarrow.parquet.ParquetFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetWriter.html"> |
| pyarrow.parquet.ParquetWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_table.html"> |
| pyarrow.parquet.read_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_metadata.html"> |
| pyarrow.parquet.read_metadata |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_pandas.html"> |
| pyarrow.parquet.read_pandas |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_schema.html"> |
| pyarrow.parquet.read_schema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.write_metadata.html"> |
| pyarrow.parquet.write_metadata |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.write_table.html"> |
| pyarrow.parquet.write_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.write_to_dataset.html"> |
| pyarrow.parquet.write_to_dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.ORCFile.html"> |
| pyarrow.orc.ORCFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.ORCWriter.html"> |
| pyarrow.orc.ORCWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.read_table.html"> |
| pyarrow.orc.read_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.write_table.html"> |
| pyarrow.orc.write_table |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/filesystems.html"> |
| Filesystems |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-19" name="toctree-checkbox-19" type="checkbox"/> |
| <label for="toctree-checkbox-19"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileInfo.html"> |
| pyarrow.fs.FileInfo |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileSelector.html"> |
| pyarrow.fs.FileSelector |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileSystem.html"> |
| pyarrow.fs.FileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.LocalFileSystem.html"> |
| pyarrow.fs.LocalFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.S3FileSystem.html"> |
| pyarrow.fs.S3FileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.HadoopFileSystem.html"> |
| pyarrow.fs.HadoopFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.SubTreeFileSystem.html"> |
| pyarrow.fs.SubTreeFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.PyFileSystem.html"> |
| pyarrow.fs.PyFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileSystemHandler.html"> |
| pyarrow.fs.FileSystemHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FSSpecHandler.html"> |
| pyarrow.fs.FSSpecHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.copy_files.html"> |
| pyarrow.fs.copy_files |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.initialize_s3.html"> |
| pyarrow.fs.initialize_s3 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.finalize_s3.html"> |
| pyarrow.fs.finalize_s3 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.resolve_s3_region.html"> |
| pyarrow.fs.resolve_s3_region |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.S3LogLevel.html"> |
| pyarrow.fs.S3LogLevel |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/dataset.html"> |
| Dataset |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-20" name="toctree-checkbox-20" type="checkbox"/> |
| <label for="toctree-checkbox-20"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.dataset.html"> |
| pyarrow.dataset.dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.parquet_dataset.html"> |
| pyarrow.dataset.parquet_dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.partitioning.html"> |
| pyarrow.dataset.partitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.field.html"> |
| pyarrow.dataset.field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.scalar.html"> |
| pyarrow.dataset.scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.write_dataset.html"> |
| pyarrow.dataset.write_dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileFormat.html"> |
| pyarrow.dataset.FileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.CsvFileFormat.html"> |
| pyarrow.dataset.CsvFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.CsvFragmentScanOptions.html"> |
| pyarrow.dataset.CsvFragmentScanOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.IpcFileFormat.html"> |
| pyarrow.dataset.IpcFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetFileFormat.html"> |
| pyarrow.dataset.ParquetFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetReadOptions.html"> |
| pyarrow.dataset.ParquetReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetFragmentScanOptions.html"> |
| pyarrow.dataset.ParquetFragmentScanOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Partitioning.html"> |
| pyarrow.dataset.Partitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.PartitioningFactory.html"> |
| pyarrow.dataset.PartitioningFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.DirectoryPartitioning.html"> |
| pyarrow.dataset.DirectoryPartitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.HivePartitioning.html"> |
| pyarrow.dataset.HivePartitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Dataset.html"> |
| pyarrow.dataset.Dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemDataset.html"> |
| pyarrow.dataset.FileSystemDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemFactoryOptions.html"> |
| pyarrow.dataset.FileSystemFactoryOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemDatasetFactory.html"> |
| pyarrow.dataset.FileSystemDatasetFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.UnionDataset.html"> |
| pyarrow.dataset.UnionDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Fragment.html"> |
| pyarrow.dataset.Fragment |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FragmentScanOptions.html"> |
| pyarrow.dataset.FragmentScanOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Scanner.html"> |
| pyarrow.dataset.Scanner |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Expression.html"> |
| pyarrow.dataset.Expression |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/plasma.html"> |
| Plasma In-Memory Object Store |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-21" name="toctree-checkbox-21" type="checkbox"/> |
| <label for="toctree-checkbox-21"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.plasma.ObjectID.html"> |
| pyarrow.plasma.ObjectID |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.plasma.PlasmaClient.html"> |
| pyarrow.plasma.PlasmaClient |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.plasma.PlasmaBuffer.html"> |
| pyarrow.plasma.PlasmaBuffer |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/cuda.html"> |
| CUDA Integration |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-22" name="toctree-checkbox-22" type="checkbox"/> |
| <label for="toctree-checkbox-22"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.Context.html"> |
| pyarrow.cuda.Context |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.CudaBuffer.html"> |
| pyarrow.cuda.CudaBuffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.new_host_buffer.html"> |
| pyarrow.cuda.new_host_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.HostBuffer.html"> |
| pyarrow.cuda.HostBuffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.BufferReader.html"> |
| pyarrow.cuda.BufferReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.BufferWriter.html"> |
| pyarrow.cuda.BufferWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.serialize_record_batch.html"> |
| pyarrow.cuda.serialize_record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.read_record_batch.html"> |
| pyarrow.cuda.read_record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.read_message.html"> |
| pyarrow.cuda.read_message |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.IpcMemHandle.html"> |
| pyarrow.cuda.IpcMemHandle |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/misc.html"> |
| Miscellaneous |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-23" name="toctree-checkbox-23" type="checkbox"/> |
| <label for="toctree-checkbox-23"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cpu_count.html"> |
| pyarrow.cpu_count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.set_cpu_count.html"> |
| pyarrow.set_cpu_count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.get_include.html"> |
| pyarrow.get_include |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.get_libraries.html"> |
| pyarrow.get_libraries |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.get_library_dirs.html"> |
| pyarrow.get_library_dirs |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/getting_involved.html"> |
| Getting Involved |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/benchmarks.html"> |
| Benchmarks |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../r/index.html"> |
| R |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md"> |
| Ruby |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://docs.rs/crate/arrow/"> |
| Rust |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../status.html"> |
| Implementation Status |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Cookbooks |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/cpp/"> |
| C++ |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/py/"> |
| Python |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/r/"> |
| R |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Specifications and Protocols |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Versioning.html"> |
| Format Versioning and Stability |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Columnar.html"> |
| Arrow Columnar Format |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Integration.html"> |
| Integration Testing |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/CDataInterface.html"> |
| The Arrow C data interface |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/CStreamInterface.html"> |
| The Arrow C stream interface |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Other.html"> |
| Other Data Structures |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Development |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/contributing.html"> |
| Contributing to Apache Arrow |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/guide/index.html"> |
| New Contributor’s Guide |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-24" name="toctree-checkbox-24" type="checkbox"/> |
| <label for="toctree-checkbox-24"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/architectural_overview.html"> |
| Architectural Overview |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/communication.html"> |
| Communication |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../developers/guide/step_by_step/index.html"> |
| Steps in making your first PR |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-25" name="toctree-checkbox-25" type="checkbox"/> |
| <label for="toctree-checkbox-25"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/set_up.html"> |
| Set up |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/building.html"> |
| Building the Arrow libraries 🏋🏿♀️ |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/finding_issues.html"> |
| Finding good first issues 🔎 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/arrow_codebase.html"> |
| Working on the Arrow codebase 🧐 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/testing.html"> |
| Testing 🧪 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/pr_and_github.html"> |
| Lifecycle of a pull request |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/documentation.html"> |
| Helping with documentation |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../developers/guide/tutorials/index.html"> |
| Tutorials |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-26" name="toctree-checkbox-26" type="checkbox"/> |
| <label for="toctree-checkbox-26"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/tutorials/python_tutorial.html"> |
| Python tutorial |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/tutorials/r_tutorial.html"> |
| R tutorial |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/resources.html"> |
| Additional information and resources |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/reviewing.html"> |
| Reviewing contributions |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/cpp/index.html"> |
| C++ Development |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-27" name="toctree-checkbox-27" type="checkbox"/> |
| <label for="toctree-checkbox-27"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/building.html"> |
| Building Arrow C++ |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/development.html"> |
| Development Guidelines |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/windows.html"> |
| Developing on Windows |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/conventions.html"> |
| Conventions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/fuzzing.html"> |
| Fuzzing Arrow C++ |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/python.html"> |
| Python Development |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/continuous_integration/index.html"> |
| Continuous Integration |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-28" name="toctree-checkbox-28" type="checkbox"/> |
| <label for="toctree-checkbox-28"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/overview.html"> |
| Continuous Integration |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/docker.html"> |
| Running Docker Builds |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/archery.html"> |
| Daily Development using Archery |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/crossbow.html"> |
| Packaging and Testing with Crossbow |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/benchmarks.html"> |
| Benchmarks |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/documentation.html"> |
| Building the Documentation |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/computeir.html"> |
| Arrow Compute IR (Intermediate Representation) |
| </a> |
| </li> |
| </ul> |
| |
| |
| </div> |
| </nav> |
| </div> |
| <div class="sidebar-end-items"> |
| </div> |
| </div> |
| |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| |
| <div class="toc-item"> |
| |
| <div class="tocsection onthispage pt-5 pb-3"> |
| <i class="fas fa-list"></i> On this page |
| </div> |
| |
| <nav id="bd-toc-nav"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#reading-datasets"> |
| Reading Datasets |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#dataset-discovery"> |
| Dataset discovery |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#reading-different-file-formats"> |
| Reading different file formats |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#customizing-file-formats"> |
| Customizing file formats |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#filtering-data"> |
| Filtering data |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#projecting-columns"> |
| Projecting columns |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#reading-and-writing-partitioned-data"> |
| Reading and writing partitioned data |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#different-partitioning-schemes"> |
| Different partitioning schemes |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#partitioning-performance-considerations"> |
| Partitioning performance considerations |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#reading-from-other-data-sources"> |
| Reading from other data sources |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#reading-in-memory-data"> |
| Reading in-memory data |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#reading-from-cloud-storage"> |
| Reading from cloud storage |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#a-note-on-transactions-acid-guarantees"> |
| A note on transactions & ACID guarantees |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#full-example"> |
| Full Example |
| </a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| |
| <div class="toc-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/master/docs/source/cpp/dataset.rst"> |
| <i class="fas fa-pencil-alt"></i> Edit this page |
| </a> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <div class="section" id="tabular-datasets"> |
| <h1>Tabular Datasets<a class="headerlink" href="#tabular-datasets" title="Permalink to this headline">¶</a></h1> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p><a class="reference internal" href="api/dataset.html"><span class="doc">Dataset API reference</span></a></p> |
| </div> |
| <div class="admonition warning"> |
| <p class="admonition-title">Warning</p> |
| <p>The <code class="docutils literal notranslate"><span class="pre">arrow::dataset</span></code> namespace is experimental, and a stable API |
| is not yet guaranteed.</p> |
| </div> |
| <p>The Arrow Datasets library provides functionality to efficiently work with |
| tabular, potentially larger than memory, and multi-file datasets. This includes:</p> |
| <ul class="simple"> |
| <li><p>A unified interface that supports different sources and file formats |
| (currently, Parquet, ORC, Feather / Arrow IPC, and CSV files) and different |
| file systems (local, cloud).</p></li> |
| <li><p>Discovery of sources (crawling directories, handling partitioned datasets with |
| various partitioning schemes, basic schema normalization, …)</p></li> |
| <li><p>Optimized reading with predicate pushdown (filtering rows), projection |
| (selecting and deriving columns), and optionally parallel reading.</p></li> |
| </ul> |
| <p>The goal is to expand support to other file formats and data sources |
| (e.g. database connections) in the future.</p> |
| <div class="section" id="reading-datasets"> |
| <span id="cpp-dataset-reading"></span><h2>Reading Datasets<a class="headerlink" href="#reading-datasets" title="Permalink to this headline">¶</a></h2> |
| <p>For the examples below, let’s create a small dataset consisting |
| of a directory with two parquet files:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">52 </span><span class="c1">// Generate some data for the rest of this example.</span> |
| <span class="lineno">53 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">CreateTable</span><span class="p">()</span> <span class="p">{</span> |
| <span class="lineno">54 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> |
| <span class="lineno">55 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> |
| <span class="lineno">56 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"c"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">())});</span> |
| <span class="lineno">57 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">></span> <span class="n">array_a</span><span class="p">;</span> |
| <span class="lineno">58 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">></span> <span class="n">array_b</span><span class="p">;</span> |
| <span class="lineno">59 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">></span> <span class="n">array_c</span><span class="p">;</span> |
| <span class="lineno">60 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">></span> <span class="n">builder</span><span class="p">;</span> |
| <span class="lineno">61 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span> |
| <span class="lineno">62 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">array_a</span><span class="p">));</span> |
| <span class="lineno">63 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">64 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span> |
| <span class="lineno">65 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">array_b</span><span class="p">));</span> |
| <span class="lineno">66 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">67 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span> |
| <span class="lineno">68 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">array_c</span><span class="p">));</span> |
| <span class="lineno">69 </span> <span class="k">return</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">{</span><span class="n">array_a</span><span class="p">,</span> <span class="n">array_b</span><span class="p">,</span> <span class="n">array_c</span><span class="p">});</span> |
| <span class="lineno">70 </span><span class="p">}</span> |
| <span class="lineno">71 </span> |
| <span class="lineno">72 </span><span class="c1">// Set up a dataset by writing two Parquet files.</span> |
| <span class="lineno">73 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">74 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">75 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="lineno">76 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno">77 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno">78 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span> |
| <span class="lineno">79 </span> <span class="c1">// Write it into two Parquet files</span> |
| <span class="lineno">80 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data1.parquet"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">81 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span> |
| <span class="lineno">82 </span> <span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span> |
| <span class="lineno">83 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data2.parquet"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">84 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span> |
| <span class="lineno">85 </span> <span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span> |
| <span class="lineno">86 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">87 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>(See the full example at bottom: <a class="reference internal" href="#cpp-dataset-full-example"><span class="std std-ref">A note on transactions & ACID guarantees</span></a>.)</p> |
| <div class="section" id="dataset-discovery"> |
| <h3>Dataset discovery<a class="headerlink" href="#dataset-discovery" title="Permalink to this headline">¶</a></h3> |
| <p>A <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Dataset</span></code></a> object can be created using the various |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::DatasetFactory</span></code></a> objects. Here, we’ll use the |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset24FileSystemDatasetFactoryE" title="arrow::dataset::FileSystemDatasetFactory"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDatasetFactory</span></code></a>, which can create a dataset |
| given a base directory path:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">159 </span><span class="c1">// Read the whole dataset with the given format, without partitioning.</span> |
| <span class="lineno">160 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ScanWholeDataset</span><span class="p">(</span> |
| <span class="lineno">161 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">162 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">163 </span> <span class="c1">// Create a dataset by scanning the filesystem for files</span> |
| <span class="lineno">164 </span><span class="hll"> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| </span><span class="lineno">165 </span><span class="hll"> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| </span><span class="lineno">166 </span><span class="hll"> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| </span><span class="lineno">167 </span><span class="hll"> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| </span><span class="lineno">168 </span><span class="hll"> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">169 </span><span class="hll"> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">170 </span> <span class="c1">// Print out the fragments</span> |
| <span class="lineno">171 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span> |
| <span class="lineno">172 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Found fragment: "</span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">173 </span> <span class="p">}</span> |
| <span class="lineno">174 </span> <span class="c1">// Read the entire dataset as a Table</span> |
| <span class="lineno">175 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">176 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">177 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">178 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>We’re also passing the filesystem to use and the file format to use for reading. |
| This lets us choose between (for example) reading local files or files in Amazon |
| S3, or between Parquet and CSV.</p> |
| <p>In addition to searching a base directory, we can list file paths manually.</p> |
| <p>Creating a <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Dataset</span></code></a> does not begin reading the data |
| itself. It only crawls the directory to find all the files (if needed), which can |
| be retrieved with <a class="reference internal" href="api/dataset.html#_CPPv4NK5arrow7dataset17FileSystemDataset5filesEv" title="arrow::dataset::FileSystemDataset::files"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDataset::files()</span></code></a>:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// Print out the files crawled (only for FileSystemDataset)</span> |
| <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">filename</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">files</span><span class="p">())</span> <span class="p">{</span> |
| <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="n">filename</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>…and infers the dataset’s schema (by default from the first file):</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="n">dataset</span><span class="o">-></span><span class="n">schema</span><span class="p">()</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| </pre></div> |
| </div> |
| <p>Using the <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7Dataset7NewScanEv" title="arrow::dataset::Dataset::NewScan"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::Dataset::NewScan()</span></code></a> method, we can build a |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Scanner</span></code></a> and read the dataset (or a portion of it) into |
| a <a class="reference internal" href="api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::Table</span></code></a> with the <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7Scanner7ToTableEv" title="arrow::dataset::Scanner::ToTable"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::Scanner::ToTable()</span></code></a> |
| method:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">159 </span><span class="c1">// Read the whole dataset with the given format, without partitioning.</span> |
| <span class="lineno">160 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ScanWholeDataset</span><span class="p">(</span> |
| <span class="lineno">161 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">162 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">163 </span> <span class="c1">// Create a dataset by scanning the filesystem for files</span> |
| <span class="lineno">164 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">165 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">166 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">167 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">168 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">169 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">170 </span> <span class="c1">// Print out the fragments</span> |
| <span class="lineno">171 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span> |
| <span class="lineno">172 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Found fragment: "</span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">173 </span> <span class="p">}</span> |
| <span class="lineno">174 </span><span class="hll"> <span class="c1">// Read the entire dataset as a Table</span> |
| </span><span class="lineno">175 </span><span class="hll"> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">176 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">177 </span><span class="hll"> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">178 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Depending on the size of your dataset, this can require a lot of |
| memory; see <a class="reference internal" href="#cpp-dataset-filtering-data"><span class="std std-ref">Filtering data</span></a> below on |
| filtering/projecting.</p> |
| </div> |
| </div> |
| <div class="section" id="reading-different-file-formats"> |
| <h3>Reading different file formats<a class="headerlink" href="#reading-different-file-formats" title="Permalink to this headline">¶</a></h3> |
| <p>The above examples use Parquet files on local disk, but the Dataset API |
| provides a consistent interface across multiple file formats and filesystems. |
| (See <a class="reference internal" href="#cpp-dataset-cloud-storage"><span class="std std-ref">Reading from cloud storage</span></a> for more information on the latter.) |
| Currently, Parquet, ORC, Feather / Arrow IPC, and CSV file formats are |
| supported; more formats are planned in the future.</p> |
| <p>If we save the table as Feather files instead of Parquet files:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno"> 91 </span><span class="c1">// Set up a dataset by writing two Feather files.</span> |
| <span class="lineno"> 92 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleFeatherDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno"> 93 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno"> 94 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/feather_dataset"</span><span class="p">;</span> |
| <span class="lineno"> 95 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno"> 96 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno"> 97 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span> |
| <span class="lineno"> 98 </span> <span class="c1">// Write it into two Feather files</span> |
| <span class="lineno"> 99 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data1.feather"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">100 </span> <span class="k">auto</span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-></span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">101 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)));</span> |
| <span class="lineno">102 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">Close</span><span class="p">());</span> |
| <span class="lineno">103 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data2.feather"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">104 </span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-></span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">105 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">)));</span> |
| <span class="lineno">106 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">Close</span><span class="p">());</span> |
| <span class="lineno">107 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">108 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>…then we can read the Feather file by passing an <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset13IpcFileFormatE" title="arrow::dataset::IpcFileFormat"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::IpcFileFormat</span></code></a>:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="c1">// ...</span> |
| <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span> |
| <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </pre></div> |
| </div> |
| </div> |
| <div class="section" id="customizing-file-formats"> |
| <h3>Customizing file formats<a class="headerlink" href="#customizing-file-formats" title="Permalink to this headline">¶</a></h3> |
| <p><a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileFormat</span></code></a> objects have properties that control how |
| files are read. For example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="n">format</span><span class="o">-></span><span class="n">reader_options</span><span class="p">.</span><span class="n">dict_columns</span><span class="p">.</span><span class="n">insert</span><span class="p">(</span><span class="s">"a"</span><span class="p">);</span> |
| </pre></div> |
| </div> |
| <p>Will configure column <code class="docutils literal notranslate"><span class="pre">"a"</span></code> to be dictionary-encoded when read. Similarly, |
| setting <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset13CsvFileFormat13parse_optionsE" title="arrow::dataset::CsvFileFormat::parse_options"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">arrow::dataset::CsvFileFormat::parse_options</span></code></a> lets us change |
| things like reading comma-separated or tab-separated data.</p> |
| <p>Additionally, passing an <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FragmentScanOptions</span></code></a> to |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE" title="arrow::dataset::ScannerBuilder::FragmentScanOptions"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::FragmentScanOptions()</span></code></a> offers fine-grained |
| control over data scanning. For example, for CSV files, we can change what values |
| are converted into Boolean true and false at scan time.</p> |
| </div> |
| </div> |
| <div class="section" id="filtering-data"> |
| <span id="cpp-dataset-filtering-data"></span><h2>Filtering data<a class="headerlink" href="#filtering-data" title="Permalink to this headline">¶</a></h2> |
| <p>So far, we’ve been reading the entire dataset, but if we need only a subset of the |
| data, this can waste time or memory reading data we don’t need. The |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Scanner</span></code></a> offers control over what data to read.</p> |
| <p>In this snippet, we use <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE" title="arrow::dataset::ScannerBuilder::Project"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::Project()</span></code></a> to select |
| which columns to read:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">182 </span><span class="c1">// Read a dataset, but select only column "b" and only rows where b < 4.</span> |
| <span class="lineno">183 </span><span class="c1">//</span> |
| <span class="lineno">184 </span><span class="c1">// This is useful when you only want a few columns from a dataset. Where possible,</span> |
| <span class="lineno">185 </span><span class="c1">// Datasets will push down the column selection such that less work is done.</span> |
| <span class="lineno">186 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span> |
| <span class="lineno">187 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">188 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">189 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">190 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">191 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">192 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">193 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">194 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">195 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">196 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">197 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">({</span><span class="s">"b"</span><span class="p">}));</span> |
| </span><span class="lineno">198 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">less</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">4</span><span class="p">))));</span> |
| <span class="lineno">199 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">200 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">201 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Some formats, such as Parquet, can reduce I/O costs here by reading only the |
| specified columns from the filesystem.</p> |
| <p>A filter can be provided with <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder6FilterERKN7compute10ExpressionE" title="arrow::dataset::ScannerBuilder::Filter"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::Filter()</span></code></a>, so |
| that rows which do not match the filter predicate will not be included in the |
| returned table. Again, some formats, such as Parquet, can use this filter to |
| reduce the amount of I/O needed.</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">182 </span><span class="c1">// Read a dataset, but select only column "b" and only rows where b < 4.</span> |
| <span class="lineno">183 </span><span class="c1">//</span> |
| <span class="lineno">184 </span><span class="c1">// This is useful when you only want a few columns from a dataset. Where possible,</span> |
| <span class="lineno">185 </span><span class="c1">// Datasets will push down the column selection such that less work is done.</span> |
| <span class="lineno">186 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span> |
| <span class="lineno">187 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">188 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">189 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">190 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">191 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">192 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">193 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">194 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">195 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">196 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">197 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">({</span><span class="s">"b"</span><span class="p">}));</span> |
| <span class="lineno">198 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">less</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">4</span><span class="p">))));</span> |
| </span><span class="lineno">199 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">200 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">201 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </div> |
| <div class="section" id="projecting-columns"> |
| <h2>Projecting columns<a class="headerlink" href="#projecting-columns" title="Permalink to this headline">¶</a></h2> |
| <p>In addition to selecting columns, <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE" title="arrow::dataset::ScannerBuilder::Project"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::Project()</span></code></a> |
| can also be used for more complex projections, such as renaming columns, casting |
| them to other types, and even deriving new columns based on evaluating |
| expressions.</p> |
| <p>In this case, we pass a vector of expressions used to construct column values |
| and a vector of names for the columns:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">205 </span><span class="c1">// Read a dataset, but with column projection.</span> |
| <span class="lineno">206 </span><span class="c1">//</span> |
| <span class="lineno">207 </span><span class="c1">// This is useful to derive new columns from existing data. For example, here we</span> |
| <span class="lineno">208 </span><span class="c1">// demonstrate casting a column to a different type, and turning a numeric column into a</span> |
| <span class="lineno">209 </span><span class="c1">// boolean column based on a predicate. You could also rename columns or perform</span> |
| <span class="lineno">210 </span><span class="c1">// computations involving multiple columns.</span> |
| <span class="lineno">211 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ProjectDataset</span><span class="p">(</span> |
| <span class="lineno">212 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">213 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">214 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">215 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">216 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">217 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">218 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">219 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">220 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">221 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">222 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">(</span> |
| </span><span class="lineno">223 </span><span class="hll"> <span class="p">{</span> |
| </span><span class="lineno">224 </span><span class="hll"> <span class="c1">// Leave column "a" as-is.</span> |
| </span><span class="lineno">225 </span><span class="hll"> <span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"a"</span><span class="p">),</span> |
| </span><span class="lineno">226 </span><span class="hll"> <span class="c1">// Cast column "b" to float32.</span> |
| </span><span class="lineno">227 </span><span class="hll"> <span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">"cast"</span><span class="p">,</span> <span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">)},</span> |
| </span><span class="lineno">228 </span><span class="hll"> <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="o">::</span><span class="n">CastOptions</span><span class="o">::</span><span class="n">Safe</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">float32</span><span class="p">())),</span> |
| </span><span class="lineno">229 </span><span class="hll"> <span class="c1">// Derive a boolean column from "c".</span> |
| </span><span class="lineno">230 </span><span class="hll"> <span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"c"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)),</span> |
| </span><span class="lineno">231 </span><span class="hll"> <span class="p">},</span> |
| </span><span class="lineno">232 </span><span class="hll"> <span class="p">{</span><span class="s">"a_renamed"</span><span class="p">,</span> <span class="s">"b_as_float32"</span><span class="p">,</span> <span class="s">"c_1"</span><span class="p">}));</span> |
| </span><span class="lineno">233 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">234 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">235 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>This also determines the column selection; only the given columns will be |
| present in the resulting table. If you want to include a derived column in |
| <em>addition</em> to the existing columns, you can build up the expressions from the |
| dataset schema:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">239 </span><span class="c1">// Read a dataset, but with column projection.</span> |
| <span class="lineno">240 </span><span class="c1">//</span> |
| <span class="lineno">241 </span><span class="c1">// This time, we read all original columns plus one derived column. This simply combines</span> |
| <span class="lineno">242 </span><span class="c1">// the previous two examples: selecting a subset of columns by name, and deriving new</span> |
| <span class="lineno">243 </span><span class="c1">// columns with an expression.</span> |
| <span class="lineno">244 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">SelectAndProjectDataset</span><span class="p">(</span> |
| <span class="lineno">245 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">246 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">247 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">248 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">249 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">250 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">251 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">252 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">253 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">254 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">255 </span><span class="hll"> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">></span> <span class="n">names</span><span class="p">;</span> |
| </span><span class="lineno">256 </span><span class="hll"> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="o">></span> <span class="n">exprs</span><span class="p">;</span> |
| </span><span class="lineno">257 </span><span class="hll"> <span class="c1">// Read all the original columns.</span> |
| </span><span class="lineno">258 </span><span class="hll"> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">field</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">schema</span><span class="p">()</span><span class="o">-></span><span class="n">fields</span><span class="p">())</span> <span class="p">{</span> |
| </span><span class="lineno">259 </span><span class="hll"> <span class="n">names</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">field</span><span class="o">-></span><span class="n">name</span><span class="p">());</span> |
| </span><span class="lineno">260 </span><span class="hll"> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="n">field</span><span class="o">-></span><span class="n">name</span><span class="p">()));</span> |
| </span><span class="lineno">261 </span><span class="hll"> <span class="p">}</span> |
| </span><span class="lineno">262 </span><span class="hll"> <span class="c1">// Also derive a new column.</span> |
| </span><span class="lineno">263 </span><span class="hll"> <span class="n">names</span><span class="p">.</span><span class="n">emplace_back</span><span class="p">(</span><span class="s">"b_large"</span><span class="p">);</span> |
| </span><span class="lineno">264 </span><span class="hll"> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)));</span> |
| </span><span class="lineno">265 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">(</span><span class="n">exprs</span><span class="p">,</span> <span class="n">names</span><span class="p">));</span> |
| </span><span class="lineno">266 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">267 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">268 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>When combining filters and projections, Arrow will determine all |
| necessary columns to read. For instance, if you filter on a column that |
| isn’t ultimately selected, Arrow will still read the column to evaluate |
| the filter.</p> |
| </div> |
| </div> |
| <div class="section" id="reading-and-writing-partitioned-data"> |
| <h2>Reading and writing partitioned data<a class="headerlink" href="#reading-and-writing-partitioned-data" title="Permalink to this headline">¶</a></h2> |
| <p>So far, we’ve been working with datasets consisting of flat directories with |
| files. Oftentimes, a dataset will have one or more columns that are frequently |
| filtered on. Instead of having to read and then filter the data, by organizing the |
| files into a nested directory structure, we can define a partitioned dataset, |
| where sub-directory names hold information about which subset of the data is |
| stored in that directory. Then, we can more efficiently filter data by using that |
| information to avoid loading files that don’t match the filter.</p> |
| <p>For example, a dataset partitioned by year and month may have the following layout:</p> |
| <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>dataset_name/ |
| year=2007/ |
| month=01/ |
| data0.parquet |
| data1.parquet |
| ... |
| month=02/ |
| data0.parquet |
| data1.parquet |
| ... |
| month=03/ |
| ... |
| year=2008/ |
| month=01/ |
| ... |
| ... |
| </pre></div> |
| </div> |
| <p>The above partitioning scheme is using “/key=value/” directory names, as found in |
| Apache Hive. Under this convention, the file at |
| <code class="docutils literal notranslate"><span class="pre">dataset_name/year=2007/month=01/data0.parquet</span></code> contains only data for which |
| <code class="docutils literal notranslate"><span class="pre">year</span> <span class="pre">==</span> <span class="pre">2007</span></code> and <code class="docutils literal notranslate"><span class="pre">month</span> <span class="pre">==</span> <span class="pre">01</span></code>.</p> |
| <p>Let’s create a small partitioned dataset. For this, we’ll use Arrow’s dataset |
| writing functionality.</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">112 </span><span class="c1">// Set up a dataset by writing files with partitioning</span> |
| <span class="lineno">113 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">114 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">115 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="lineno">116 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno">117 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno">118 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span> |
| <span class="lineno">119 </span> <span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> |
| <span class="lineno">120 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"c"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"part"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="lineno">121 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">>></span> <span class="n">arrays</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span> |
| <span class="lineno">122 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">></span> <span class="n">builder</span><span class="p">;</span> |
| <span class="lineno">123 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span> |
| <span class="lineno">124 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">]));</span> |
| <span class="lineno">125 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">126 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span> |
| <span class="lineno">127 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span> |
| <span class="lineno">128 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">129 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span> |
| <span class="lineno">130 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span> |
| <span class="lineno">131 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">StringBuilder</span> <span class="n">string_builder</span><span class="p">;</span> |
| <span class="lineno">132 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span> |
| <span class="lineno">133 </span> <span class="n">string_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">}));</span> |
| <span class="lineno">134 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">string_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">3</span><span class="p">]));</span> |
| <span class="lineno">135 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">arrays</span><span class="p">);</span> |
| <span class="lineno">136 </span><span class="hll"> <span class="c1">// Write it using Datasets</span> |
| </span><span class="lineno">137 </span><span class="hll"> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">></span><span class="p">(</span><span class="n">table</span><span class="p">);</span> |
| </span><span class="lineno">138 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">139 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scanner_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">140 </span><span class="hll"> |
| </span><span class="lineno">141 </span><span class="hll"> <span class="c1">// The partition schema determines which fields are part of the partitioning.</span> |
| </span><span class="lineno">142 </span><span class="hll"> <span class="k">auto</span> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"part"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| </span><span class="lineno">143 </span><span class="hll"> <span class="c1">// We'll use Hive-style partitioning, which creates directories with "key=value" pairs.</span> |
| </span><span class="lineno">144 </span><span class="hll"> <span class="k">auto</span> <span class="n">partitioning</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">></span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span> |
| </span><span class="lineno">145 </span><span class="hll"> <span class="c1">// We'll write Parquet files.</span> |
| </span><span class="lineno">146 </span><span class="hll"> <span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| </span><span class="lineno">147 </span><span class="hll"> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span> <span class="n">write_options</span><span class="p">;</span> |
| </span><span class="lineno">148 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span> <span class="o">=</span> <span class="n">format</span><span class="o">-></span><span class="n">DefaultWriteOptions</span><span class="p">();</span> |
| </span><span class="lineno">149 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span> <span class="o">=</span> <span class="n">filesystem</span><span class="p">;</span> |
| </span><span class="lineno">150 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_path</span><span class="p">;</span> |
| </span><span class="lineno">151 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">partitioning</span><span class="p">;</span> |
| </span><span class="lineno">152 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span> <span class="o">=</span> <span class="s">"part{i}.parquet"</span><span class="p">;</span> |
| </span><span class="lineno">153 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDataset</span><span class="o">::</span><span class="n">Write</span><span class="p">(</span><span class="n">write_options</span><span class="p">,</span> <span class="n">scanner</span><span class="p">));</span> |
| </span><span class="lineno">154 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">155 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The above created a directory with two subdirectories (“part=a” and “part=b”), |
| and the Parquet files written in those directories no longer include the “part” |
| column.</p> |
| <p>Reading this dataset, we now specify that the dataset should use a Hive-like |
| partitioning scheme:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">272 </span><span class="c1">// Read an entire dataset, but with partitioning information.</span> |
| <span class="lineno">273 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ScanPartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">274 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">275 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">276 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">277 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">278 </span><span class="hll"> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span> <span class="c1">// Make sure to search subdirectories</span> |
| </span><span class="lineno">279 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span> |
| <span class="lineno">280 </span><span class="hll"> <span class="c1">// We'll use Hive-style partitioning. We'll let Arrow Datasets infer the partition</span> |
| </span><span class="lineno">281 </span><span class="hll"> <span class="c1">// schema.</span> |
| </span><span class="lineno">282 </span><span class="hll"> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span> |
| </span><span class="lineno">283 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span> |
| <span class="lineno">284 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">285 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">286 </span> <span class="c1">// Print out the fragments</span> |
| <span class="lineno">287 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span> |
| <span class="lineno">288 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Found fragment: "</span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">289 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Partition expression: "</span> |
| <span class="lineno">290 </span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">partition_expression</span><span class="p">().</span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">291 </span> <span class="p">}</span> |
| <span class="lineno">292 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">293 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">294 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">295 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Although the partition fields are not included in the actual Parquet files, |
| they will be added back to the resulting table when scanning this dataset:</p> |
| <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>$ ./debug/dataset_documentation_example file:///tmp parquet_hive partitioned |
| Found fragment: /tmp/parquet_dataset/part=a/part0.parquet |
| Partition expression: (part == "a") |
| Found fragment: /tmp/parquet_dataset/part=b/part1.parquet |
| Partition expression: (part == "b") |
| Read 20 rows |
| a: int64 |
| -- field metadata -- |
| PARQUET:field_id: '1' |
| b: double |
| -- field metadata -- |
| PARQUET:field_id: '2' |
| c: int64 |
| -- field metadata -- |
| PARQUET:field_id: '3' |
| part: string |
| ---- |
| # snip... |
| </pre></div> |
| </div> |
| <p>We can now filter on the partition keys, which avoids loading files |
| altogether if they do not match the filter:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">299 </span><span class="c1">// Read an entire dataset, but with partitioning information. Also, filter the dataset on</span> |
| <span class="lineno">300 </span><span class="c1">// the partition values.</span> |
| <span class="lineno">301 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">FilterPartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">302 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">303 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">304 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">305 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">306 </span> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span> |
| <span class="lineno">307 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span> |
| <span class="lineno">308 </span> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span> |
| <span class="lineno">309 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span> |
| <span class="lineno">310 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">311 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">312 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">313 </span><span class="hll"> <span class="c1">// Filter based on the partition values. This will mean that we won't even read the</span> |
| </span><span class="lineno">314 </span><span class="hll"> <span class="c1">// files whose partition expressions don't match the filter.</span> |
| </span><span class="lineno">315 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span> |
| </span><span class="lineno">316 </span><span class="hll"> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"part"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="s">"b"</span><span class="p">))));</span> |
| </span><span class="lineno">317 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">318 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">319 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <div class="section" id="different-partitioning-schemes"> |
| <h3>Different partitioning schemes<a class="headerlink" href="#different-partitioning-schemes" title="Permalink to this headline">¶</a></h3> |
| <p>The above example uses a Hive-like directory scheme, such as “/year=2009/month=11/day=15”. |
| We specified this by passing the Hive partitioning factory. In this case, the types of |
| the partition keys are inferred from the file paths.</p> |
| <p>It is also possible to directly construct the partitioning and explicitly define |
| the schema of the partition keys. For example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">part</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">></span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span> |
| <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"year"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int16</span><span class="p">()),</span> |
| <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"month"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int8</span><span class="p">()),</span> |
| <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"day"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">())</span> |
| <span class="p">}));</span> |
| </pre></div> |
| </div> |
| <p>Arrow supports another partitioning scheme, “directory partitioning”, where the |
| segments in the file path represent the values of the partition keys without |
| including the name (the field names are implicit in the segment’s index). For |
| example, given field names “year”, “month”, and “day”, one path might be |
| “/2019/11/15”.</p> |
| <p>Since the names are not included in the file paths, these must be specified |
| when constructing a directory partitioning:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">DirectoryPartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">({</span><span class="s">"year"</span><span class="p">,</span> <span class="s">"month"</span><span class="p">,</span> <span class="s">"day"</span><span class="p">});</span> |
| </pre></div> |
| </div> |
| <p>Directory partitioning also supports providing a full schema rather than inferring |
| types from file paths.</p> |
| </div> |
| <div class="section" id="partitioning-performance-considerations"> |
| <h3>Partitioning performance considerations<a class="headerlink" href="#partitioning-performance-considerations" title="Permalink to this headline">¶</a></h3> |
| <p>Partitioning datasets has two aspects that affect performance: it increases the number of |
| files and it creates a directory structure around the files. Both of these have benefits |
| as well as costs. Depending on the configuration and the size of your dataset, the costs |
| can outweigh the benefits.</p> |
| <p>Because partitions split up the dataset into multiple files, partitioned datasets can be |
| read and written with parallelism. However, each additional file adds a little overhead in |
| processing for filesystem interaction. It also increases the overall dataset size since |
| each file has some shared metadata. For example, each parquet file contains the schema and |
| group-level statistics. The number of partitions is a floor for the number of files. If |
| you partition a dataset by date with a year of data, you will have at least 365 files. If |
| you further partition by another dimension with 1,000 unique values, you will have up to |
| 365,000 files. This fine of partitioning often leads to small files that mostly consist of |
| metadata.</p> |
| <p>Partitioned datasets create nested folder structures, and those allow us to prune which |
| files are loaded in a scan. However, this adds overhead to discovering files in the dataset, |
| as we’ll need to recursively “list directory” to find the data files. Too fine |
| partitions can cause problems here: Partitioning a dataset by date for a years worth |
| of data will require 365 list calls to find all the files; adding another column with |
| cardinality 1,000 will make that 365,365 calls.</p> |
| <p>The most optimal partitioning layout will depend on your data, access patterns, and which |
| systems will be reading the data. Most systems, including Arrow, should work across a |
| range of file sizes and partitioning layouts, but there are extremes you should avoid. These |
| guidelines can help avoid some known worst cases:</p> |
| <ul class="simple"> |
| <li><p>Avoid files smaller than 20MB and larger than 2GB.</p></li> |
| <li><p>Avoid partitioning layouts with more than 10,000 distinct partitions.</p></li> |
| </ul> |
| <p>For file formats that have a notion of groups within a file, such as Parquet, similar |
| guidelines apply. Row groups can provide parallelism when reading and allow data skipping |
| based on statistics, but very small groups can cause metadata to be a significant portion |
| of file size. Arrow’s file writer provides sensible defaults for group sizing in most cases.</p> |
| </div> |
| </div> |
| <div class="section" id="reading-from-other-data-sources"> |
| <h2>Reading from other data sources<a class="headerlink" href="#reading-from-other-data-sources" title="Permalink to this headline">¶</a></h2> |
| <div class="section" id="reading-in-memory-data"> |
| <h3>Reading in-memory data<a class="headerlink" href="#reading-in-memory-data" title="Permalink to this headline">¶</a></h3> |
| <p>If you already have data in memory that you’d like to use with the Datasets API |
| (e.g. to filter/project data, or to write it out to a filesystem), you can wrap it |
| in an <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset15InMemoryDatasetE" title="arrow::dataset::InMemoryDataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::InMemoryDataset</span></code></a>:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatches</span><span class="p">(...);</span> |
| <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">></span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">table</span><span class="p">));</span> |
| <span class="c1">// Scan the dataset, filter, it, etc.</span> |
| <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">();</span> |
| </pre></div> |
| </div> |
| <p>In the example, we used the InMemoryDataset to write our example data to local |
| disk which was used in the rest of the example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">112 </span><span class="c1">// Set up a dataset by writing files with partitioning</span> |
| <span class="lineno">113 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">114 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">115 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="lineno">116 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno">117 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno">118 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span> |
| <span class="lineno">119 </span> <span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> |
| <span class="lineno">120 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"c"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"part"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="lineno">121 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">>></span> <span class="n">arrays</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span> |
| <span class="lineno">122 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">></span> <span class="n">builder</span><span class="p">;</span> |
| <span class="lineno">123 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span> |
| <span class="lineno">124 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">]));</span> |
| <span class="lineno">125 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">126 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span> |
| <span class="lineno">127 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span> |
| <span class="lineno">128 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">129 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span> |
| <span class="lineno">130 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span> |
| <span class="lineno">131 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">StringBuilder</span> <span class="n">string_builder</span><span class="p">;</span> |
| <span class="lineno">132 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span> |
| <span class="lineno">133 </span> <span class="n">string_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">}));</span> |
| <span class="lineno">134 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">string_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">3</span><span class="p">]));</span> |
| <span class="lineno">135 </span><span class="hll"> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">arrays</span><span class="p">);</span> |
| </span><span class="lineno">136 </span><span class="hll"> <span class="c1">// Write it using Datasets</span> |
| </span><span class="lineno">137 </span><span class="hll"> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">></span><span class="p">(</span><span class="n">table</span><span class="p">);</span> |
| </span><span class="lineno">138 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">139 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scanner_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| </span><span class="lineno">140 </span> |
| <span class="lineno">141 </span> <span class="c1">// The partition schema determines which fields are part of the partitioning.</span> |
| <span class="lineno">142 </span> <span class="k">auto</span> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"part"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="lineno">143 </span> <span class="c1">// We'll use Hive-style partitioning, which creates directories with "key=value" pairs.</span> |
| <span class="lineno">144 </span> <span class="k">auto</span> <span class="n">partitioning</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">></span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span> |
| <span class="lineno">145 </span> <span class="c1">// We'll write Parquet files.</span> |
| <span class="lineno">146 </span> <span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="lineno">147 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span> <span class="n">write_options</span><span class="p">;</span> |
| <span class="lineno">148 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span> <span class="o">=</span> <span class="n">format</span><span class="o">-></span><span class="n">DefaultWriteOptions</span><span class="p">();</span> |
| <span class="lineno">149 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span> <span class="o">=</span> <span class="n">filesystem</span><span class="p">;</span> |
| <span class="lineno">150 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">151 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">partitioning</span><span class="p">;</span> |
| <span class="lineno">152 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span> <span class="o">=</span> <span class="s">"part{i}.parquet"</span><span class="p">;</span> |
| <span class="lineno">153 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDataset</span><span class="o">::</span><span class="n">Write</span><span class="p">(</span><span class="n">write_options</span><span class="p">,</span> <span class="n">scanner</span><span class="p">));</span> |
| <span class="lineno">154 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">155 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </div> |
| <div class="section" id="reading-from-cloud-storage"> |
| <span id="cpp-dataset-cloud-storage"></span><h3>Reading from cloud storage<a class="headerlink" href="#reading-from-cloud-storage" title="Permalink to this headline">¶</a></h3> |
| <p>In addition to local files, Arrow Datasets also support reading from cloud |
| storage systems, such as Amazon S3, by passing a different filesystem.</p> |
| <p>See the <a class="reference internal" href="io.html#cpp-filesystems"><span class="std std-ref">filesystem</span></a> docs for more details on the available |
| filesystems.</p> |
| </div> |
| </div> |
| <div class="section" id="a-note-on-transactions-acid-guarantees"> |
| <span id="cpp-dataset-full-example"></span><h2>A note on transactions & ACID guarantees<a class="headerlink" href="#a-note-on-transactions-acid-guarantees" title="Permalink to this headline">¶</a></h2> |
| <p>The dataset API offers no transaction support or any ACID guarantees. This affects |
| both reading and writing. Concurrent reads are fine. Concurrent writes or writes |
| concurring with reads may have unexpected behavior. Various approaches can be used |
| to avoid operating on the same files such as using a unique basename template for |
| each writer, a temporary directory for new files, or separate storage of the file |
| list instead of relying on directory discovery.</p> |
| <p>Unexpectedly killing the process while a write is in progress can leave the system |
| in an inconsistent state. Write calls generally return as soon as the bytes to be |
| written have been completely delivered to the OS page cache. Even though a write |
| operation has been completed it is possible for part of the file to be lost if |
| there is a sudden power loss immediately after the write call.</p> |
| <p>Most file formats have magic numbers which are written at the end. This means a |
| partial file write can safely be detected and discarded. The CSV file format does |
| not have any such concept and a partially written CSV file may be detected as valid.</p> |
| </div> |
| <div class="section" id="full-example"> |
| <h2>Full Example<a class="headerlink" href="#full-example" title="Permalink to this headline">¶</a></h2> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno"> 1 </span><span class="c1">// Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="lineno"> 2 </span><span class="c1">// or more contributor license agreements. See the NOTICE file</span> |
| <span class="lineno"> 3 </span><span class="c1">// distributed with this work for additional information</span> |
| <span class="lineno"> 4 </span><span class="c1">// regarding copyright ownership. The ASF licenses this file</span> |
| <span class="lineno"> 5 </span><span class="c1">// to you under the Apache License, Version 2.0 (the</span> |
| <span class="lineno"> 6 </span><span class="c1">// "License"); you may not use this file except in compliance</span> |
| <span class="lineno"> 7 </span><span class="c1">// with the License. You may obtain a copy of the License at</span> |
| <span class="lineno"> 8 </span><span class="c1">//</span> |
| <span class="lineno"> 9 </span><span class="c1">// http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="lineno"> 10 </span><span class="c1">//</span> |
| <span class="lineno"> 11 </span><span class="c1">// Unless required by applicable law or agreed to in writing,</span> |
| <span class="lineno"> 12 </span><span class="c1">// software distributed under the License is distributed on an</span> |
| <span class="lineno"> 13 </span><span class="c1">// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span> |
| <span class="lineno"> 14 </span><span class="c1">// KIND, either express or implied. See the License for the</span> |
| <span class="lineno"> 15 </span><span class="c1">// specific language governing permissions and limitations</span> |
| <span class="lineno"> 16 </span><span class="c1">// under the License.</span> |
| <span class="lineno"> 17 </span> |
| <span class="lineno"> 18 </span><span class="c1">// This example showcases various ways to work with Datasets. It's</span> |
| <span class="lineno"> 19 </span><span class="c1">// intended to be paired with the documentation.</span> |
| <span class="lineno"> 20 </span> |
| <span class="lineno"> 21 </span><span class="cp">#include</span> <span class="cpf"><arrow/api.h></span><span class="cp"></span> |
| <span class="lineno"> 22 </span><span class="cp">#include</span> <span class="cpf"><arrow/compute/cast.h></span><span class="cp"></span> |
| <span class="lineno"> 23 </span><span class="cp">#include</span> <span class="cpf"><arrow/compute/exec/expression.h></span><span class="cp"></span> |
| <span class="lineno"> 24 </span><span class="cp">#include</span> <span class="cpf"><arrow/dataset/dataset.h></span><span class="cp"></span> |
| <span class="lineno"> 25 </span><span class="cp">#include</span> <span class="cpf"><arrow/dataset/discovery.h></span><span class="cp"></span> |
| <span class="lineno"> 26 </span><span class="cp">#include</span> <span class="cpf"><arrow/dataset/file_base.h></span><span class="cp"></span> |
| <span class="lineno"> 27 </span><span class="cp">#include</span> <span class="cpf"><arrow/dataset/file_ipc.h></span><span class="cp"></span> |
| <span class="lineno"> 28 </span><span class="cp">#include</span> <span class="cpf"><arrow/dataset/file_parquet.h></span><span class="cp"></span> |
| <span class="lineno"> 29 </span><span class="cp">#include</span> <span class="cpf"><arrow/dataset/scanner.h></span><span class="cp"></span> |
| <span class="lineno"> 30 </span><span class="cp">#include</span> <span class="cpf"><arrow/filesystem/filesystem.h></span><span class="cp"></span> |
| <span class="lineno"> 31 </span><span class="cp">#include</span> <span class="cpf"><arrow/ipc/writer.h></span><span class="cp"></span> |
| <span class="lineno"> 32 </span><span class="cp">#include</span> <span class="cpf"><arrow/util/iterator.h></span><span class="cp"></span> |
| <span class="lineno"> 33 </span><span class="cp">#include</span> <span class="cpf"><parquet/arrow/writer.h></span><span class="cp"></span> |
| <span class="lineno"> 34 </span> |
| <span class="lineno"> 35 </span><span class="cp">#include</span> <span class="cpf"><iostream></span><span class="cp"></span> |
| <span class="lineno"> 36 </span><span class="cp">#include</span> <span class="cpf"><vector></span><span class="cp"></span> |
| <span class="lineno"> 37 </span> |
| <span class="lineno"> 38 </span><span class="k">namespace</span> <span class="n">ds</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="p">;</span> |
| <span class="lineno"> 39 </span><span class="k">namespace</span> <span class="n">fs</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="p">;</span> |
| <span class="lineno"> 40 </span><span class="k">namespace</span> <span class="n">cp</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="p">;</span> |
| <span class="lineno"> 41 </span> |
| <span class="lineno"> 42 </span><span class="cp">#define ABORT_ON_FAILURE(expr) \</span> |
| <span class="lineno"> 43 </span><span class="cp"> do { \</span> |
| <span class="lineno"> 44 </span><span class="cp"> arrow::Status status_ = (expr); \</span> |
| <span class="lineno"> 45 </span><span class="cp"> if (!status_.ok()) { \</span> |
| <span class="lineno"> 46 </span><span class="cp"> std::cerr << status_.message() << std::endl; \</span> |
| <span class="lineno"> 47 </span><span class="cp"> abort(); \</span> |
| <span class="lineno"> 48 </span><span class="cp"> } \</span> |
| <span class="lineno"> 49 </span><span class="cp"> } while (0);</span> |
| <span class="lineno"> 50 </span> |
| <span class="lineno"> 51 </span><span class="c1">// (Doc section: Reading Datasets)</span> |
| <span class="lineno"> 52 </span><span class="c1">// Generate some data for the rest of this example.</span> |
| <span class="lineno"> 53 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">CreateTable</span><span class="p">()</span> <span class="p">{</span> |
| <span class="lineno"> 54 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> |
| <span class="lineno"> 55 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> |
| <span class="lineno"> 56 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"c"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">())});</span> |
| <span class="lineno"> 57 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">></span> <span class="n">array_a</span><span class="p">;</span> |
| <span class="lineno"> 58 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">></span> <span class="n">array_b</span><span class="p">;</span> |
| <span class="lineno"> 59 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">></span> <span class="n">array_c</span><span class="p">;</span> |
| <span class="lineno"> 60 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">></span> <span class="n">builder</span><span class="p">;</span> |
| <span class="lineno"> 61 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span> |
| <span class="lineno"> 62 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">array_a</span><span class="p">));</span> |
| <span class="lineno"> 63 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno"> 64 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span> |
| <span class="lineno"> 65 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">array_b</span><span class="p">));</span> |
| <span class="lineno"> 66 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno"> 67 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span> |
| <span class="lineno"> 68 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">array_c</span><span class="p">));</span> |
| <span class="lineno"> 69 </span> <span class="k">return</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">{</span><span class="n">array_a</span><span class="p">,</span> <span class="n">array_b</span><span class="p">,</span> <span class="n">array_c</span><span class="p">});</span> |
| <span class="lineno"> 70 </span><span class="p">}</span> |
| <span class="lineno"> 71 </span> |
| <span class="lineno"> 72 </span><span class="c1">// Set up a dataset by writing two Parquet files.</span> |
| <span class="lineno"> 73 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno"> 74 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno"> 75 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="lineno"> 76 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno"> 77 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno"> 78 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span> |
| <span class="lineno"> 79 </span> <span class="c1">// Write it into two Parquet files</span> |
| <span class="lineno"> 80 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data1.parquet"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno"> 81 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span> |
| <span class="lineno"> 82 </span> <span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span> |
| <span class="lineno"> 83 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data2.parquet"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno"> 84 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span> |
| <span class="lineno"> 85 </span> <span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span> |
| <span class="lineno"> 86 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno"> 87 </span><span class="p">}</span> |
| <span class="lineno"> 88 </span><span class="c1">// (Doc section: Reading Datasets)</span> |
| <span class="lineno"> 89 </span> |
| <span class="lineno"> 90 </span><span class="c1">// (Doc section: Reading different file formats)</span> |
| <span class="lineno"> 91 </span><span class="c1">// Set up a dataset by writing two Feather files.</span> |
| <span class="lineno"> 92 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleFeatherDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno"> 93 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno"> 94 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/feather_dataset"</span><span class="p">;</span> |
| <span class="lineno"> 95 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno"> 96 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno"> 97 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span> |
| <span class="lineno"> 98 </span> <span class="c1">// Write it into two Feather files</span> |
| <span class="lineno"> 99 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data1.feather"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">100 </span> <span class="k">auto</span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-></span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">101 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)));</span> |
| <span class="lineno">102 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">Close</span><span class="p">());</span> |
| <span class="lineno">103 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-></span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">"/data2.feather"</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">104 </span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-></span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">105 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-></span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">)));</span> |
| <span class="lineno">106 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-></span><span class="n">Close</span><span class="p">());</span> |
| <span class="lineno">107 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">108 </span><span class="p">}</span> |
| <span class="lineno">109 </span><span class="c1">// (Doc section: Reading different file formats)</span> |
| <span class="lineno">110 </span> |
| <span class="lineno">111 </span><span class="c1">// (Doc section: Reading and writing partitioned data)</span> |
| <span class="lineno">112 </span><span class="c1">// Set up a dataset by writing files with partitioning</span> |
| <span class="lineno">113 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">114 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">115 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="lineno">116 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="lineno">117 </span> <span class="c1">// Create an Arrow Table</span> |
| <span class="lineno">118 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span> |
| <span class="lineno">119 </span> <span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> |
| <span class="lineno">120 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"c"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"part"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="lineno">121 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">>></span> <span class="n">arrays</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span> |
| <span class="lineno">122 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">></span> <span class="n">builder</span><span class="p">;</span> |
| <span class="lineno">123 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span> |
| <span class="lineno">124 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">]));</span> |
| <span class="lineno">125 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">126 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span> |
| <span class="lineno">127 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span> |
| <span class="lineno">128 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span> |
| <span class="lineno">129 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span> |
| <span class="lineno">130 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span> |
| <span class="lineno">131 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">StringBuilder</span> <span class="n">string_builder</span><span class="p">;</span> |
| <span class="lineno">132 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span> |
| <span class="lineno">133 </span> <span class="n">string_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"a"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">,</span> <span class="s">"b"</span><span class="p">}));</span> |
| <span class="lineno">134 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">string_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">arrays</span><span class="p">[</span><span class="mi">3</span><span class="p">]));</span> |
| <span class="lineno">135 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">arrays</span><span class="p">);</span> |
| <span class="lineno">136 </span> <span class="c1">// Write it using Datasets</span> |
| <span class="lineno">137 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">></span><span class="p">(</span><span class="n">table</span><span class="p">);</span> |
| <span class="lineno">138 </span> <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">139 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scanner_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">140 </span> |
| <span class="lineno">141 </span> <span class="c1">// The partition schema determines which fields are part of the partitioning.</span> |
| <span class="lineno">142 </span> <span class="k">auto</span> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"part"</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="lineno">143 </span> <span class="c1">// We'll use Hive-style partitioning, which creates directories with "key=value" pairs.</span> |
| <span class="lineno">144 </span> <span class="k">auto</span> <span class="n">partitioning</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">></span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span> |
| <span class="lineno">145 </span> <span class="c1">// We'll write Parquet files.</span> |
| <span class="lineno">146 </span> <span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="lineno">147 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span> <span class="n">write_options</span><span class="p">;</span> |
| <span class="lineno">148 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span> <span class="o">=</span> <span class="n">format</span><span class="o">-></span><span class="n">DefaultWriteOptions</span><span class="p">();</span> |
| <span class="lineno">149 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span> <span class="o">=</span> <span class="n">filesystem</span><span class="p">;</span> |
| <span class="lineno">150 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">151 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">partitioning</span><span class="p">;</span> |
| <span class="lineno">152 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span> <span class="o">=</span> <span class="s">"part{i}.parquet"</span><span class="p">;</span> |
| <span class="lineno">153 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDataset</span><span class="o">::</span><span class="n">Write</span><span class="p">(</span><span class="n">write_options</span><span class="p">,</span> <span class="n">scanner</span><span class="p">));</span> |
| <span class="lineno">154 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">155 </span><span class="p">}</span> |
| <span class="lineno">156 </span><span class="c1">// (Doc section: Reading and writing partitioned data)</span> |
| <span class="lineno">157 </span> |
| <span class="lineno">158 </span><span class="c1">// (Doc section: Dataset discovery)</span> |
| <span class="lineno">159 </span><span class="c1">// Read the whole dataset with the given format, without partitioning.</span> |
| <span class="lineno">160 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ScanWholeDataset</span><span class="p">(</span> |
| <span class="lineno">161 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">162 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">163 </span> <span class="c1">// Create a dataset by scanning the filesystem for files</span> |
| <span class="lineno">164 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">165 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">166 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">167 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">168 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">169 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">170 </span> <span class="c1">// Print out the fragments</span> |
| <span class="lineno">171 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span> |
| <span class="lineno">172 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Found fragment: "</span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">173 </span> <span class="p">}</span> |
| <span class="lineno">174 </span> <span class="c1">// Read the entire dataset as a Table</span> |
| <span class="lineno">175 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">176 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">177 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">178 </span><span class="p">}</span> |
| <span class="lineno">179 </span><span class="c1">// (Doc section: Dataset discovery)</span> |
| <span class="lineno">180 </span> |
| <span class="lineno">181 </span><span class="c1">// (Doc section: Filtering data)</span> |
| <span class="lineno">182 </span><span class="c1">// Read a dataset, but select only column "b" and only rows where b < 4.</span> |
| <span class="lineno">183 </span><span class="c1">//</span> |
| <span class="lineno">184 </span><span class="c1">// This is useful when you only want a few columns from a dataset. Where possible,</span> |
| <span class="lineno">185 </span><span class="c1">// Datasets will push down the column selection such that less work is done.</span> |
| <span class="lineno">186 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span> |
| <span class="lineno">187 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">188 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">189 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">190 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">191 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">192 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">193 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">194 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">195 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">196 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">197 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">({</span><span class="s">"b"</span><span class="p">}));</span> |
| <span class="lineno">198 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">less</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">4</span><span class="p">))));</span> |
| <span class="lineno">199 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">200 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">201 </span><span class="p">}</span> |
| <span class="lineno">202 </span><span class="c1">// (Doc section: Filtering data)</span> |
| <span class="lineno">203 </span> |
| <span class="lineno">204 </span><span class="c1">// (Doc section: Projecting columns)</span> |
| <span class="lineno">205 </span><span class="c1">// Read a dataset, but with column projection.</span> |
| <span class="lineno">206 </span><span class="c1">//</span> |
| <span class="lineno">207 </span><span class="c1">// This is useful to derive new columns from existing data. For example, here we</span> |
| <span class="lineno">208 </span><span class="c1">// demonstrate casting a column to a different type, and turning a numeric column into a</span> |
| <span class="lineno">209 </span><span class="c1">// boolean column based on a predicate. You could also rename columns or perform</span> |
| <span class="lineno">210 </span><span class="c1">// computations involving multiple columns.</span> |
| <span class="lineno">211 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ProjectDataset</span><span class="p">(</span> |
| <span class="lineno">212 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">213 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">214 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">215 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">216 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">217 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">218 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">219 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">220 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">221 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">222 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">(</span> |
| <span class="lineno">223 </span> <span class="p">{</span> |
| <span class="lineno">224 </span> <span class="c1">// Leave column "a" as-is.</span> |
| <span class="lineno">225 </span> <span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"a"</span><span class="p">),</span> |
| <span class="lineno">226 </span> <span class="c1">// Cast column "b" to float32.</span> |
| <span class="lineno">227 </span> <span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">"cast"</span><span class="p">,</span> <span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">)},</span> |
| <span class="lineno">228 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="o">::</span><span class="n">CastOptions</span><span class="o">::</span><span class="n">Safe</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">float32</span><span class="p">())),</span> |
| <span class="lineno">229 </span> <span class="c1">// Derive a boolean column from "c".</span> |
| <span class="lineno">230 </span> <span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"c"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)),</span> |
| <span class="lineno">231 </span> <span class="p">},</span> |
| <span class="lineno">232 </span> <span class="p">{</span><span class="s">"a_renamed"</span><span class="p">,</span> <span class="s">"b_as_float32"</span><span class="p">,</span> <span class="s">"c_1"</span><span class="p">}));</span> |
| <span class="lineno">233 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">234 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">235 </span><span class="p">}</span> |
| <span class="lineno">236 </span><span class="c1">// (Doc section: Projecting columns)</span> |
| <span class="lineno">237 </span> |
| <span class="lineno">238 </span><span class="c1">// (Doc section: Projecting columns #2)</span> |
| <span class="lineno">239 </span><span class="c1">// Read a dataset, but with column projection.</span> |
| <span class="lineno">240 </span><span class="c1">//</span> |
| <span class="lineno">241 </span><span class="c1">// This time, we read all original columns plus one derived column. This simply combines</span> |
| <span class="lineno">242 </span><span class="c1">// the previous two examples: selecting a subset of columns by name, and deriving new</span> |
| <span class="lineno">243 </span><span class="c1">// columns with an expression.</span> |
| <span class="lineno">244 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">SelectAndProjectDataset</span><span class="p">(</span> |
| <span class="lineno">245 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">246 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">247 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">248 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">249 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> |
| <span class="lineno">250 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span> |
| <span class="lineno">251 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">252 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">253 </span> <span class="c1">// Read specified columns with a row filter</span> |
| <span class="lineno">254 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">255 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">></span> <span class="n">names</span><span class="p">;</span> |
| <span class="lineno">256 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="o">></span> <span class="n">exprs</span><span class="p">;</span> |
| <span class="lineno">257 </span> <span class="c1">// Read all the original columns.</span> |
| <span class="lineno">258 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">field</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">schema</span><span class="p">()</span><span class="o">-></span><span class="n">fields</span><span class="p">())</span> <span class="p">{</span> |
| <span class="lineno">259 </span> <span class="n">names</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">field</span><span class="o">-></span><span class="n">name</span><span class="p">());</span> |
| <span class="lineno">260 </span> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="n">field</span><span class="o">-></span><span class="n">name</span><span class="p">()));</span> |
| <span class="lineno">261 </span> <span class="p">}</span> |
| <span class="lineno">262 </span> <span class="c1">// Also derive a new column.</span> |
| <span class="lineno">263 </span> <span class="n">names</span><span class="p">.</span><span class="n">emplace_back</span><span class="p">(</span><span class="s">"b_large"</span><span class="p">);</span> |
| <span class="lineno">264 </span> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"b"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)));</span> |
| <span class="lineno">265 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-></span><span class="n">Project</span><span class="p">(</span><span class="n">exprs</span><span class="p">,</span> <span class="n">names</span><span class="p">));</span> |
| <span class="lineno">266 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">267 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">268 </span><span class="p">}</span> |
| <span class="lineno">269 </span><span class="c1">// (Doc section: Projecting columns #2)</span> |
| <span class="lineno">270 </span> |
| <span class="lineno">271 </span><span class="c1">// (Doc section: Reading and writing partitioned data #2)</span> |
| <span class="lineno">272 </span><span class="c1">// Read an entire dataset, but with partitioning information.</span> |
| <span class="lineno">273 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">ScanPartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">274 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">275 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">276 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">277 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">278 </span> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span> <span class="c1">// Make sure to search subdirectories</span> |
| <span class="lineno">279 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span> |
| <span class="lineno">280 </span> <span class="c1">// We'll use Hive-style partitioning. We'll let Arrow Datasets infer the partition</span> |
| <span class="lineno">281 </span> <span class="c1">// schema.</span> |
| <span class="lineno">282 </span> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span> |
| <span class="lineno">283 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span> |
| <span class="lineno">284 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">285 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">286 </span> <span class="c1">// Print out the fragments</span> |
| <span class="lineno">287 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-></span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span> |
| <span class="lineno">288 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Found fragment: "</span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">289 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Partition expression: "</span> |
| <span class="lineno">290 </span> <span class="o"><<</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-></span><span class="n">partition_expression</span><span class="p">().</span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">291 </span> <span class="p">}</span> |
| <span class="lineno">292 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">293 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">294 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">295 </span><span class="p">}</span> |
| <span class="lineno">296 </span><span class="c1">// (Doc section: Reading and writing partitioned data #2)</span> |
| <span class="lineno">297 </span> |
| <span class="lineno">298 </span><span class="c1">// (Doc section: Reading and writing partitioned data #3)</span> |
| <span class="lineno">299 </span><span class="c1">// Read an entire dataset, but with partitioning information. Also, filter the dataset on</span> |
| <span class="lineno">300 </span><span class="c1">// the partition values.</span> |
| <span class="lineno">301 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">FilterPartitionedDataset</span><span class="p">(</span> |
| <span class="lineno">302 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">>&</span> <span class="n">filesystem</span><span class="p">,</span> |
| <span class="lineno">303 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">>&</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">304 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span> |
| <span class="lineno">305 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span> |
| <span class="lineno">306 </span> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span> |
| <span class="lineno">307 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span> |
| <span class="lineno">308 </span> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span> |
| <span class="lineno">309 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span> |
| <span class="lineno">310 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">311 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">312 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-></span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">313 </span> <span class="c1">// Filter based on the partition values. This will mean that we won't even read the</span> |
| <span class="lineno">314 </span> <span class="c1">// files whose partition expressions don't match the filter.</span> |
| <span class="lineno">315 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span> |
| <span class="lineno">316 </span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"part"</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="s">"b"</span><span class="p">))));</span> |
| <span class="lineno">317 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-></span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">318 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-></span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">319 </span><span class="p">}</span> |
| <span class="lineno">320 </span><span class="c1">// (Doc section: Reading and writing partitioned data #3)</span> |
| <span class="lineno">321 </span> |
| <span class="lineno">322 </span><span class="kt">int</span> <span class="n">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span><span class="o">**</span> <span class="n">argv</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">323 </span> <span class="k">if</span> <span class="p">(</span><span class="n">argc</span> <span class="o"><</span> <span class="mi">3</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">324 </span> <span class="c1">// Fake success for CI purposes.</span> |
| <span class="lineno">325 </span> <span class="k">return</span> <span class="n">EXIT_SUCCESS</span><span class="p">;</span> |
| <span class="lineno">326 </span> <span class="p">}</span> |
| <span class="lineno">327 </span> |
| <span class="lineno">328 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">uri</span> <span class="o">=</span> <span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span> |
| <span class="lineno">329 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">format_name</span> <span class="o">=</span> <span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">];</span> |
| <span class="lineno">330 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">mode</span> <span class="o">=</span> <span class="n">argc</span> <span class="o">></span> <span class="mi">3</span> <span class="o">?</span> <span class="n">argv</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">:</span> <span class="s">"no_filter"</span><span class="p">;</span> |
| <span class="lineno">331 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">root_path</span><span class="p">;</span> |
| <span class="lineno">332 </span> <span class="k">auto</span> <span class="n">fs</span> <span class="o">=</span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSystemFromUri</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span> <span class="o">&</span><span class="n">root_path</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span> |
| <span class="lineno">333 </span> |
| <span class="lineno">334 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">base_path</span><span class="p">;</span> |
| <span class="lineno">335 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">></span> <span class="n">format</span><span class="p">;</span> |
| <span class="lineno">336 </span> <span class="k">if</span> <span class="p">(</span><span class="n">format_name</span> <span class="o">==</span> <span class="s">"feather"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">337 </span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">IpcFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="lineno">338 </span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">CreateExampleFeatherDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">root_path</span><span class="p">);</span> |
| <span class="lineno">339 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">format_name</span> <span class="o">==</span> <span class="s">"parquet"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">340 </span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="lineno">341 </span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">CreateExampleParquetDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">root_path</span><span class="p">);</span> |
| <span class="lineno">342 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">format_name</span> <span class="o">==</span> <span class="s">"parquet_hive"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">343 </span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="lineno">344 </span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">root_path</span><span class="p">);</span> |
| <span class="lineno">345 </span> <span class="p">}</span> <span class="k">else</span> <span class="p">{</span> |
| <span class="lineno">346 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> <span class="o"><<</span> <span class="s">"Unknown format: "</span> <span class="o"><<</span> <span class="n">format_name</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">347 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> <span class="o"><<</span> <span class="s">"Supported formats: feather, parquet, parquet_hive"</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">348 </span> <span class="k">return</span> <span class="n">EXIT_FAILURE</span><span class="p">;</span> |
| <span class="lineno">349 </span> <span class="p">}</span> |
| <span class="lineno">350 </span> |
| <span class="lineno">351 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span> <span class="n">table</span><span class="p">;</span> |
| <span class="lineno">352 </span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">"no_filter"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">353 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">ScanWholeDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span> |
| <span class="lineno">354 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">"filter"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">355 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span> |
| <span class="lineno">356 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">"project"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">357 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">ProjectDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span> |
| <span class="lineno">358 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">"select_project"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">359 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">SelectAndProjectDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span> |
| <span class="lineno">360 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">"partitioned"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">361 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">ScanPartitionedDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span> |
| <span class="lineno">362 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">"filter_partitioned"</span><span class="p">)</span> <span class="p">{</span> |
| <span class="lineno">363 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">FilterPartitionedDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span> |
| <span class="lineno">364 </span> <span class="p">}</span> <span class="k">else</span> <span class="p">{</span> |
| <span class="lineno">365 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> <span class="o"><<</span> <span class="s">"Unknown mode: "</span> <span class="o"><<</span> <span class="n">mode</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">366 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> |
| <span class="lineno">367 </span> <span class="o"><<</span> <span class="s">"Supported modes: no_filter, filter, project, select_project, partitioned"</span> |
| <span class="lineno">368 </span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">369 </span> <span class="k">return</span> <span class="n">EXIT_FAILURE</span><span class="p">;</span> |
| <span class="lineno">370 </span> <span class="p">}</span> |
| <span class="lineno">371 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="s">"Read "</span> <span class="o"><<</span> <span class="n">table</span><span class="o">-></span><span class="n">num_rows</span><span class="p">()</span> <span class="o"><<</span> <span class="s">" rows"</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">372 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o"><<</span> <span class="n">table</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span> <span class="o"><<</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="lineno">373 </span> <span class="k">return</span> <span class="n">EXIT_SUCCESS</span><span class="p">;</span> |
| <span class="lineno">374 </span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| |
| <!-- Previous / next buttons --> |
| <div class='prev-next-area'> |
| <a class='left-prev' id="prev-link" href="json.html" title="previous page"> |
| <i class="fas fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Reading JSON files</p> |
| </div> |
| </a> |
| <a class='right-next' id="next-link" href="flight.html" title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Arrow Flight RPC</p> |
| </div> |
| <i class="fas fa-angle-right"></i> |
| </a> |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| <script src="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script> |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| |
| <div class="footer-item"> |
| <p class="copyright"> |
| © Copyright 2016-2022 Apache Software Foundation.<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 4.4.0.<br> |
| </p> |
| </div> |
| |
| </div> |
| </footer> |
| <script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body> |
| </html> |