blob: 23b81c7dba5ffd444c484c970d4777daead83398 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Reading and Writing ORC files &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'cpp/orc';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/orc.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Reading and writing Parquet files" href="parquet.html" />
<link rel="prev" title="Reading and writing the Arrow IPC format" href="ipc.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="getting_started.html">Getting Started</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="build_system.html">Using Arrow C++ in your own project</a></li>
<li class="toctree-l2"><a class="reference internal" href="conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/basic_arrow.html">Basic Arrow Data Structures</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/io_tutorial.html">Arrow File I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/compute_tutorial.html">Arrow Compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorials/datasets_tutorial.html">Arrow Datasets</a></li>
</ul>
</li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="user_guide.html">User Guide</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="overview.html">High-Level Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="memory.html">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="arrays.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="datatypes.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="tables.html">Tabular Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="compute.html">Compute Functions</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="gandiva.html">The Gandiva Expression Compiler</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="gandiva/expr_projector_filter.html">Gandiva Expression, Projector, and Filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="gandiva/external_func.html">Gandiva External Functions Development Guide</a></li>
</ul>
</li>
<li class="toctree-l2 has-children"><a class="reference internal" href="streaming_execution.html">Acero: A C++ streaming execution engine</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="acero/overview.html">Acero Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/user_guide.html">Acero User’s Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/substrait.html">Using Acero with Substrait</a></li>
<li class="toctree-l3"><a class="reference internal" href="acero/developer_guide.html">Developer’s Guide</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="io.html">Input / output and filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="ipc.html">Reading and writing the Arrow IPC format</a></li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">Reading and Writing ORC files</a></li>
<li class="toctree-l2"><a class="reference internal" href="parquet.html">Reading and writing Parquet files</a></li>
<li class="toctree-l2"><a class="reference internal" href="csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="gdb.html">Debugging code using Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="threading.html">Thread Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="opentelemetry.html">OpenTelemetry</a></li>
<li class="toctree-l2"><a class="reference internal" href="env_vars.html">Environment Variables</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="examples/index.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="examples/cmake_minimal_build.html">Minimal build using CMake</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/compute_and_write_example.html">Compute and Write CSV Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/dataset_documentation_example.html">Arrow Datasets example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/dataset_skyhook_scan_example.html">Arrow Skyhook example</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/row_columnar_conversion.html">Row to columnar conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/tuple_range_conversion.html">std::tuple-like ranges to Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/converting_recordbatch_to_tensor.html">Converting RecordBatch to Tensor</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="api/support.html">Programming Support</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/memory.html">Memory (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/thread.html">Thread (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/datatype.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/array.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/scalar.html">Scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/builder.html">Array Builders</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/table.html">Two-dimensional Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/c_abi.html">C Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/acero.html">Streaming Execution (Acero)</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/gandiva.html">Gandiva Expression Compiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/tensor.html">Tensors</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/utilities.html">Utilities</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/async.html">Asynchronous programming</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/io.html">Input / output</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/ipc.html">Arrow IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/formats.html">File Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/cuda.html">CUDA support</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/flightsql.html">Arrow Flight SQL</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/filesystem.html">Filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="api/dataset.html">Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">C++ cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">C++ Implementation</a></li>
<li class="breadcrumb-item"><a href="user_guide.html" class="nav-link">User Guide</a></li>
<li class="breadcrumb-item active" aria-current="page">Reading and...</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="reading-and-writing-orc-files">
<h1>Reading and Writing ORC files<a class="headerlink" href="#reading-and-writing-orc-files" title="Permalink to this heading">#</a></h1>
<p>The <a class="reference external" href="http://orc.apache.org/">Apache ORC</a> project provides a
standardized open-source columnar storage format for use in data analysis
systems. It was created originally for use in <a class="reference external" href="http://hadoop.apache.org/">Apache Hadoop</a> with systems like <a class="reference external" href="http://drill.apache.org">Apache Drill</a>, <a class="reference external" href="http://hive.apache.org">Apache Hive</a>, <a class="reference external" href="http://impala.apache.org">Apache
Impala</a>, and <a class="reference external" href="http://spark.apache.org">Apache Spark</a> adopting it as a shared standard for high
performance data IO.</p>
<p>Apache Arrow is an ideal in-memory representation layer for data that is being read
or written with ORC files.</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="api/formats.html#cpp-api-orc"><span class="std std-ref">ORC reader/writer API reference</span></a>.</p>
</div>
<section id="supported-orc-features">
<h2>Supported ORC features<a class="headerlink" href="#supported-orc-features" title="Permalink to this heading">#</a></h2>
<p>The ORC format has many features, and we support a subset of them.</p>
<section id="data-types">
<h3>Data types<a class="headerlink" href="#data-types" title="Permalink to this heading">#</a></h3>
<p>Here are a list of ORC types and mapped Arrow types.</p>
<table class="table">
<thead>
<tr class="row-odd"><th class="head"><p>Logical type</p></th>
<th class="head"><p>Mapped Arrow type</p></th>
<th class="head"><p>Notes</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>BOOLEAN</p></td>
<td><p>Boolean</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>BYTE</p></td>
<td><p>Int8</p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>SHORT</p></td>
<td><p>Int16</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>INT</p></td>
<td><p>Int32</p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>LONG</p></td>
<td><p>Int64</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>FLOAT</p></td>
<td><p>Float32</p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>DOUBLE</p></td>
<td><p>Float64</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>STRING</p></td>
<td><p>String/LargeString</p></td>
<td><p>(1)</p></td>
</tr>
<tr class="row-even"><td><p>BINARY</p></td>
<td><p>Binary/LargeBinary/FixedSizeBinary</p></td>
<td><p>(1)</p></td>
</tr>
<tr class="row-odd"><td><p>TIMESTAMP</p></td>
<td><p>Timestamp/Date64</p></td>
<td><p>(1) (2)</p></td>
</tr>
<tr class="row-even"><td><p>TIMESTAMP_INSTANT</p></td>
<td><p>Timestamp</p></td>
<td><p>(2)</p></td>
</tr>
<tr class="row-odd"><td><p>LIST</p></td>
<td><p>List/LargeList/FixedSizeList</p></td>
<td><p>(1)</p></td>
</tr>
<tr class="row-even"><td><p>MAP</p></td>
<td><p>Map</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>STRUCT</p></td>
<td><p>Struct</p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>UNION</p></td>
<td><p>SparseUnion/DenseUnion</p></td>
<td><p>(1)</p></td>
</tr>
<tr class="row-odd"><td><p>DECIMAL</p></td>
<td><p>Decimal128/Decimal256</p></td>
<td><p>(1)</p></td>
</tr>
<tr class="row-even"><td><p>DATE</p></td>
<td><p>Date32</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>VARCHAR</p></td>
<td><p>String</p></td>
<td><p>(3)</p></td>
</tr>
<tr class="row-even"><td><p>CHAR</p></td>
<td><p>String</p></td>
<td><p>(3)</p></td>
</tr>
</tbody>
</table>
<ul class="simple">
<li><p>(1) On the read side the ORC type is read as the first corresponding Arrow type in the table.</p></li>
<li><p>(2) On the write side the ORC TIMESTAMP_INSTANT is used when timezone is provided, otherwise
ORC TIMESTAMP is used. On the read side both ORC TIMESTAMP and TIMESTAMP_INSTANT types are read
as the Arrow Timestamp type with <a class="reference internal" href="api/datatype.html#_CPPv4N5arrow8TimeUnit4type4NANOE" title="arrow::TimeUnit::NANO"><code class="xref cpp cpp-enumerator docutils literal notranslate"><span class="pre">arrow::TimeUnit::NANO</span></code></a> and timezone is set to
UTC for ORC TIMESTAMP_INSTANT type only.</p></li>
<li><p>(3) On the read side both ORC CHAR and VARCHAR types are read as the Arrow String type. ORC CHAR
and VARCHAR types are not supported on the write side.</p></li>
</ul>
</section>
<section id="compression">
<h3>Compression<a class="headerlink" href="#compression" title="Permalink to this heading">#</a></h3>
<table class="table">
<thead>
<tr class="row-odd"><th class="head"><p>Compression codec</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>SNAPPY</p></td>
</tr>
<tr class="row-odd"><td><p>GZIP/ZLIB</p></td>
</tr>
<tr class="row-even"><td><p>LZ4</p></td>
</tr>
<tr class="row-odd"><td><p>ZSTD</p></td>
</tr>
</tbody>
</table>
<p><em>Unsupported compression codec:</em> LZO.</p>
</section>
</section>
<section id="reading-orc-files">
<h2>Reading ORC Files<a class="headerlink" href="#reading-orc-files" title="Permalink to this heading">#</a></h2>
<p>The <a class="reference internal" href="api/formats.html#_CPPv4N5arrow8adapters3orc13ORCFileReaderE" title="arrow::adapters::orc::ORCFileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ORCFileReader</span></code></a> class reads data for an entire
file or stripe into an <a class="reference internal" href="api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">::arrow::Table</span></code></a>.</p>
<section id="orcfilereader">
<h3>ORCFileReader<a class="headerlink" href="#orcfilereader" title="Permalink to this heading">#</a></h3>
<p>The <a class="reference internal" href="api/formats.html#_CPPv4N5arrow8adapters3orc13ORCFileReaderE" title="arrow::adapters::orc::ORCFileReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ORCFileReader</span></code></a> class requires a
<a class="reference internal" href="api/io.html#_CPPv4N5arrow2io16RandomAccessFileE" title="arrow::io::RandomAccessFile"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">::arrow::io::RandomAccessFile</span></code></a> instance representing the input
file.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/adapters/orc/adapter.h&gt;</span>
<span class="p">{</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">st</span><span class="p">;</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">MemoryPool</span><span class="o">*</span><span class="w"> </span><span class="n">pool</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">default_memory_pool</span><span class="p">();</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">RandomAccessFile</span><span class="o">&gt;</span><span class="w"> </span><span class="n">input</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">...;</span>
<span class="w"> </span><span class="c1">// Open ORC file reader</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">adapters</span><span class="o">::</span><span class="n">orc</span><span class="o">::</span><span class="n">ORCFileReader</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">pool</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_reader</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle error instantiating file reader...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">unique_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">adapters</span><span class="o">::</span><span class="n">orc</span><span class="o">::</span><span class="n">ORCFileReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">maybe_reader</span><span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// Read entire file as a single Arrow table</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="o">-&gt;</span><span class="n">Read</span><span class="p">();</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_table</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle error reading ORC data...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">maybe_table</span><span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
</section>
<section id="writing-orc-files">
<h2>Writing ORC Files<a class="headerlink" href="#writing-orc-files" title="Permalink to this heading">#</a></h2>
<section id="orcfilewriter">
<h3>ORCFileWriter<a class="headerlink" href="#orcfilewriter" title="Permalink to this heading">#</a></h3>
<p>An ORC file is written to a <a class="reference internal" href="api/io.html#_CPPv4N5arrow2io12OutputStreamE" title="arrow::io::OutputStream"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">OutputStream</span></code></a>.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/adapters/orc/adapter.h&gt;</span>
<span class="p">{</span>
<span class="w"> </span><span class="c1">// Oneshot write</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">io</span><span class="o">::</span><span class="n">OutputStream</span><span class="o">&gt;</span><span class="w"> </span><span class="n">output</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">...;</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">writer_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">WriterOptions</span><span class="p">();</span>
<span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">maybe_writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">adapters</span><span class="o">::</span><span class="n">orc</span><span class="o">::</span><span class="n">ORCFileWriter</span><span class="o">::</span><span class="n">Open</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="n">writer_options</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">maybe_writer</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle error instantiating file writer...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">unique_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">adapters</span><span class="o">::</span><span class="n">orc</span><span class="o">::</span><span class="n">ORCFileWriter</span><span class="o">&gt;</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">maybe_writer</span><span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Write</span><span class="p">(</span><span class="o">*</span><span class="n">input_table</span><span class="p">)).</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle write error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">()).</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// Handle close error...</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="ipc.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Reading and writing the Arrow IPC format</p>
</div>
</a>
<a class="right-next"
href="parquet.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Reading and writing Parquet files</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#supported-orc-features">Supported ORC features</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-types">Data types</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#compression">Compression</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-orc-files">Reading ORC Files</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#orcfilereader">ORCFileReader</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#writing-orc-files">Writing ORC Files</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#orcfilewriter">ORCFileWriter</a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/orc.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>