blob: f6c5092c38fc9d685f1fb124004be92b79e16926 [file]
<!DOCTYPE html>
<html lang="en" data-content_root="../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Security Considerations &#8212; Apache Arrow v25.0.0.dev7</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=55c26ed414f5f8a31ebb" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=55c26ed414f5f8a31ebb" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=03e43079" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css?v=8dcd28dc" />
<!-- So that users can add custom icons -->
<script defer src="../_static/scripts/fontawesome.js?digest=55c26ed414f5f8a31ebb"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=55c26ed414f5f8a31ebb" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=55c26ed414f5f8a31ebb" />
<script src="../_static/documentation_options.js?v=6811d49d"></script>
<script src="../_static/doctools.js?v=fd6eb6e6"></script>
<script src="../_static/sphinx_highlight.js?v=6ffebe34"></script>
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
<script src="../_static/copybutton.js?v=3bb21c8c"></script>
<script src="../_static/design-tabs.js?v=f930bc37"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'format/Security';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.17.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
true;
</script>
<script>DOCUMENTATION_OPTIONS.search_as_you_type = false;</script>
<link rel="canonical" href="https://arrow.apache.org/docs/format/Security.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Integration Testing" href="Integration.html" />
<link rel="prev" title="ADBC: Arrow Database Connectivity" href="ADBC.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="25.0.0.dev7" />
<script src="../_static/searchtools.js"></script>
<script src="../_static/language_data.js"></script>
<script src="../searchindex.js"></script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header id="pst-header" class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class=" navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v25.0.0.dev7 - Home"/>
<img src="../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v25.0.0.dev7 - Home"/>
</a></div>
</div>
<div class=" navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Specifications
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../implementations.html">
Implementations
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item"><div class="kapa-ai-bot">
<script
async
src="https://widget.kapa.ai/kapa-widget.bundle.js"
data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
data-project-name="Apache Arrow"
data-project-color="#000000"
data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc."
data-consent-required="true"
data-user-analytics-cookie-enabled="false"
data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
></script>
</div>
</div>
<div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i>
<span class="sr-only">LinkedIn</span></a>
</li>
<li class="nav-item">
<a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i>
<span class="sr-only">BlueSky</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
<span class="fa-solid fa-outdent"></span>
</button>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Specifications
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../implementations.html">
Implementations
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><div class="kapa-ai-bot">
<script
async
src="https://widget.kapa.ai/kapa-widget.bundle.js"
data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
data-project-name="Apache Arrow"
data-project-color="#000000"
data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc."
data-consent-required="true"
data-user-analytics-cookie-enabled="false"
data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
></script>
</div>
</div>
<div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i>
<span class="sr-only">LinkedIn</span></a>
</li>
<li class="nav-item">
<a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i>
<span class="sr-only">BlueSky</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item pst-sidebar-collapse"><button id="pst-collapse-sidebar-button" aria-expanded="true" aria-controls="pst-primary-sidebar"><svg class="pst-icon svg-inline--fa" role="img" aria-hidden="true" focusable="false" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg">
<path fill="currentColor" d="M3 15.5C2.36232 15.5 1.74874 15.2564 1.28478 14.8189C0.820828 14.3815 0.541576 13.7832 0.504167 13.1467L0.5 13L0.5 3C0.499965 2.36232 0.743605 1.74874 1.18107 1.28478C1.61854 0.820828 2.21676 0.541576 2.85333 0.504167L3 0.5L13 0.5C13.6377 0.499965 14.2513 0.743605 14.7152 1.18107C15.1792 1.61854 15.4584 2.21676 15.4958 2.85333L15.5 3L15.5 13C15.5 13.6377 15.2564 14.2513 14.8189 14.7152C14.3815 15.1792 13.7832 15.4584 13.1467 15.4958L13 15.5L3 15.5ZM3 13.8333L10.5 13.8333L10.5 2.16667L3 2.16667C2.79589 2.16669 2.59889 2.24163 2.44636 2.37726C2.29383 2.5129 2.19638 2.69979 2.1725 2.9025L2.16667 3L2.16667 13C2.16669 13.2041 2.24163 13.4011 2.37726 13.5536C2.5129 13.7062 2.69979 13.8036 2.9025 13.8275L3 13.8333ZM6.65583 10.325L6.5775 10.2558L4.91083 8.58917C4.76735 8.44567 4.68116 8.25476 4.66843 8.05223C4.65569 7.84971 4.71729 7.6495 4.84167 7.48917L4.91083 7.41083L6.5775 5.74417C6.72747 5.59471 6.9287 5.50794 7.14032 5.50148C7.35194 5.49502 7.55809 5.56935 7.7169 5.70937C7.8757 5.8494 7.97525 6.04463 7.99533 6.25539C8.01541 6.46616 7.95451 6.67667 7.825 6.84417L7.75583 6.9225L6.67917 8L7.75583 9.0775C7.89931 9.22099 7.98551 9.41191 7.99824 9.61443C8.01097 9.81695 7.94938 10.0172 7.825 10.1775L7.75583 10.2558C7.61234 10.3993 7.42142 10.4855 7.2189 10.4982C7.01638 10.511 6.81617 10.4494 6.65583 10.325Z"/>
</svg>
<span class="pst-collapse-sidebar-label">Collapse Sidebar</span>
<span class="pst-expand-sidebar-label">Expand Sidebar</span>
</button></div>
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="Intro.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="Columnar.html">Arrow Columnar Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="Versioning.html">Format Versioning and Stability</a></li>
<li class="toctree-l1"><a class="reference internal" href="Changing.html">Changing the Apache Arrow Format Specification</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="CanonicalExtensions.html">Canonical Extension Types</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="CanonicalExtensions/Examples.html">Canonical Extension Examples</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="Other.html">Other Data Structures</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="CDataInterface.html">The Arrow C data interface</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="CStreamInterface.html">The Arrow C stream interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="CDeviceDataInterface.html">The Arrow C Device data interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="StatisticsSchema.html">Statistics schema</a></li>
<li class="toctree-l1"><a class="reference internal" href="DissociatedIPC.html">Dissociated IPC Protocol</a></li>
<li class="toctree-l1"><a class="reference internal" href="Flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="FlightSql.html">Arrow Flight SQL</a></li>
<li class="toctree-l1"><a class="reference internal" href="ADBC.html">ADBC: Arrow Database Connectivity</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Security Considerations</a></li>
<li class="toctree-l1"><a class="reference internal" href="Integration.html">Integration Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="Glossary.html">Glossary</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
<div class="sidebar-primary-item">
<div id="ethical-ad-placement"
class="flat"
data-ea-publisher="readthedocs"
data-ea-type="readthedocs-sidebar"
data-ea-manual="true">
</div></div>
</div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Specifications</a></li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Security Considerations</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="security-considerations">
<span id="format-security"></span><h1>Security Considerations<a class="headerlink" href="#security-considerations" title="Link to this heading">#</a></h1>
<p>This document describes security considerations when reading Arrow
data from untrusted sources. It focuses specifically on data passed in a
standardized serialized form (such as a IPC stream), as opposed to an
implementation-specific native representation (such as <code class="docutils literal notranslate"><span class="pre">arrow::Array</span></code> in C++).</p>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>Implementation-specific concerns, such as bad API usage, are out of scope
for this document. Please refer to the implementation’s own documentation.</p>
</div>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<dl class="simple">
<dt>Arrow C++ <a class="reference internal" href="../cpp/security.html#cpp-security"><span class="std std-ref">Security Considerations</span></a></dt><dd><p>Security model for Arrow C++ APIs</p>
</dd>
</dl>
</div>
<section id="who-should-read-this">
<h2>Who should read this<a class="headerlink" href="#who-should-read-this" title="Link to this heading">#</a></h2>
<p>You should read this document if you belong to either of these two categories:</p>
<ol class="arabic simple">
<li><p><em>users</em> of Arrow: that is, developers of third-party libraries or applications
that don’t directly implement the Arrow formats or protocols, but instead
call language-specific APIs provided by an Arrow library (as defined below);</p></li>
<li><p><em>implementors</em> of Arrow libraries: that is, libraries that provide APIs
abstracting away from the details of the Arrow formats and protocols; such
libraries include, but are not limited to, the official Arrow implementations
documented on <a class="reference external" href="https://arrow.apache.org">https://arrow.apache.org</a>.</p></li>
</ol>
</section>
<section id="columnar-format">
<h2>Columnar Format<a class="headerlink" href="#columnar-format" title="Link to this heading">#</a></h2>
<section id="invalid-data">
<span id="format-invalid-data"></span><h3>Invalid data<a class="headerlink" href="#invalid-data" title="Link to this heading">#</a></h3>
<p>The Arrow <a class="reference internal" href="Columnar.html#format-columnar"><span class="std std-ref">columnar format</span></a> is an efficient binary
representation with a focus on performance and efficiency. While the format
does not store raw pointers, the contents of Arrow buffers are often
combined and converted to pointers into the process’ address space.
Invalid Arrow data may therefore cause invalid memory accesses
(potentially crashing the process) or access to non-Arrow data
(potentially allowing an attacker to exfiltrate confidential information).</p>
<p>For instance, to read a value from a Binary array, one needs to 1) read the
values’ offsets from the array’s offsets buffer, and 2) read the range of bytes
delimited by these offsets in the array’s data buffer. If the offsets are
invalid (deliberately or not), then step 2) can access memory outside of the
data buffer’s range.</p>
<p>Another instance of invalid data lies in the values themselves. For example,
a String array is only allowed to contain valid UTF-8 data, but an untrusted
source might have emitted invalid UTF-8 under the disguise of a String array.
An unsuspecting algorithm that is only specified for valid UTF-8 inputs might
lead to dangerous behavior (for example by reading memory out of bounds when
looking for an UTF-8 character boundary).</p>
<p>Fortunately, knowing its schema, it is possible to validate Arrow data up front,
so that reading this data will not pose any danger later on.</p>
<section id="advice-for-users">
<h4>Advice for users<a class="headerlink" href="#advice-for-users" title="Link to this heading">#</a></h4>
<p>Arrow implementations often assume inputs follow the specification to provide
high speed processing. It is <strong>extremely recommended</strong> that your application
explicitly validates any Arrow data it receives under serialized form
from untrusted sources. Many Arrow implementations provide explicit APIs to
perform such validation.</p>
</section>
<section id="advice-for-implementors">
<h4>Advice for implementors<a class="headerlink" href="#advice-for-implementors" title="Link to this heading">#</a></h4>
<p>It is <strong>recommended</strong> that you provide dedicated APIs to validate Arrow arrays
and/or record batches. Users will be able to utilize those APIs to assert whether
data coming from untrusted sources can be safely accessed.</p>
<p>A typical validation API must return a well-defined error, not crash, if the
given Arrow data is invalid; it must always be safe to execute regardless of
whether the data is valid or not.</p>
</section>
</section>
<section id="uninitialized-data">
<h3>Uninitialized data<a class="headerlink" href="#uninitialized-data" title="Link to this heading">#</a></h3>
<p>A less obvious pitfall is when some parts of an Arrow array are left uninitialized.
For example, if an element of a primitive Arrow array is marked null through its
validity bitmap, the corresponding value slot in the values buffer can be ignored
for all purposes. It is therefore tempting, when creating an array with null
values, to not initialize the corresponding value slots.</p>
<p>However, this then introduces a serious security risk if the Arrow data is
serialized and published (e.g. using IPC or Flight) such that it can be
accessed by untrusted users. Indeed, the uninitialized value slot can
reveal data left by a previous memory allocation made in the same process.
Depending on the application, this data could contain confidential information.</p>
<section id="advice-for-users-and-implementors">
<h4>Advice for users and implementors<a class="headerlink" href="#advice-for-users-and-implementors" title="Link to this heading">#</a></h4>
<p>When creating a Arrow array, it is <strong>recommended</strong> that you never leave any
data uninitialized in a buffer if the array might be sent to, or read by, an
untrusted third-party, even when the uninitialized data is logically
irrelevant. The easiest way to do this is to zero-initialize any buffer that
will not be populated in full.</p>
<p>If it is determined, through benchmarking, that zero-initialization imposes
an excessive performance cost, a library or application may instead decide
to use uninitialized memory internally as an optimization; but it should then
ensure all such uninitialized values are cleared before passing the Arrow data
to another system.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Sending Arrow data out of the current process can happen <em>indirectly</em>,
for example if you produce it over the C Data Interface and the consumer
persists it using the IPC format on some public storage.</p>
</div>
</section>
</section>
</section>
<section id="c-data-interface">
<h2>C Data Interface<a class="headerlink" href="#c-data-interface" title="Link to this heading">#</a></h2>
<p>The C Data Interface contains raw pointers into the process’ address space.
It is generally not possible to validate that those pointers are legitimate;
read from such a pointer may crash or access unrelated or bogus data.</p>
<section id="id1">
<h3>Advice for users<a class="headerlink" href="#id1" title="Link to this heading">#</a></h3>
<p>You should <strong>never</strong> consume a C Data Interface structure from an untrusted
producer, as it is by construction impossible to guard against dangerous
behavior in this case.</p>
</section>
<section id="id2">
<h3>Advice for implementors<a class="headerlink" href="#id2" title="Link to this heading">#</a></h3>
<p>When consuming a C Data Interface structure, you can assume that it comes from
a trusted producer, for the reason explained above. However, it is still
<strong>recommended</strong> that you validate it for soundness (for example that the right
number of buffers is passed for a given datatype), as a trusted producer can
have bugs anyway.</p>
</section>
</section>
<section id="ipc-format">
<h2>IPC Format<a class="headerlink" href="#ipc-format" title="Link to this heading">#</a></h2>
<p>The <a class="reference internal" href="Columnar.html#ipc-message-format"><span class="std std-ref">IPC format</span></a> is a serialization format for the
columnar format with associated metadata. Reading an IPC stream or file from
an untrusted source comes with similar caveats as reading the Arrow columnar
format.</p>
<p>The additional signalisation and metadata in the IPC format come with
their own risks. For example, buffer offsets and sizes encoded in IPC messages
may be out of bounds for the IPC stream; Flatbuffers-encoded metadata payloads
may carry incorrect offsets pointing outside of the designated metadata area.</p>
<section id="id3">
<h3>Advice for users<a class="headerlink" href="#id3" title="Link to this heading">#</a></h3>
<p>Arrow libraries will typically ensure IPC streams are structurally valid
but may not also validate the underlying Array data. It is <strong>extremely recommended</strong>
that you use the appropriate APIs to validate the Arrow data read from an untrusted IPC stream.</p>
</section>
<section id="id4">
<h3>Advice for implementors<a class="headerlink" href="#id4" title="Link to this heading">#</a></h3>
<p>It is <strong>extremely recommended</strong> to run dedicated validation checks when decoding
the IPC format, to make sure that the decoding can not induce unwanted behavior.
Failing those checks should return a well-known error to the caller, not crash.</p>
</section>
</section>
<section id="extension-types">
<h2>Extension Types<a class="headerlink" href="#extension-types" title="Link to this heading">#</a></h2>
<p>Extension types typically register a custom deserialization hook so that they
can be automatically recreated when reading from an external source (for example
using IPC). The deserialization hook has to decode the extension type’s parameters
from a string or binary payload specific to the extension type.
<a class="reference internal" href="CanonicalExtensions.html#opaque-extension"><span class="std std-ref">Typical examples</span></a> use a bespoke JSON representation
with object fields representing the various parameters.</p>
<p>When reading data from an untrusted source, any registered deserialization hook
could be called with an arbitrary payload. It is therefore of primary importance
that the hook be safe to call on invalid, potentially malicious, data. This mandates
the use of a robust metadata serialization schema (such as JSON, but not Python’s
<a class="reference external" href="https://docs.python.org/3/library/pickle.html">pickle</a> or R’s
<a class="reference external" href="https://stat.ethz.ch/R-manual/R-devel/library/base/html/serialize.html">serialize()</a>,
for example).</p>
<section id="id5">
<h3>Advice for users and implementors<a class="headerlink" href="#id5" title="Link to this heading">#</a></h3>
<p>When designing an extension type, it is <strong>extremely recommended</strong> to choose a
metadata serialization format that is robust against potentially malicious
data.</p>
<p>When implementing an extension type, it is <strong>recommended</strong> to ensure that the
deserialization hook is able to detect, and error out gracefully, if the
serialized metadata payload is invalid.</p>
</section>
</section>
<section id="testing-for-robustness">
<h2>Testing for robustness<a class="headerlink" href="#testing-for-robustness" title="Link to this heading">#</a></h2>
<section id="id6">
<h3>Advice for implementors<a class="headerlink" href="#id6" title="Link to this heading">#</a></h3>
<p>For APIs that may process untrusted inputs, it is <strong>extremely recommended</strong>
that your unit tests exercise your APIs against typical kinds of invalid data.
For example, your validation APIs will have to be tested against invalid Binary
or List offsets, invalid UTF-8 data in a String array, etc.</p>
<section id="testing-against-known-regression-files">
<h4>Testing against known regression files<a class="headerlink" href="#testing-against-known-regression-files" title="Link to this heading">#</a></h4>
<p>The <a class="reference external" href="https://github.com/apache/arrow-testing/">arrow-testing</a> repository
contains regression files for various formats, such as the IPC format.</p>
<p>Two categories of files are especially noteworthy and can serve to exercise
an Arrow implementation’s robustness:</p>
<ol class="arabic simple">
<li><p><a class="reference internal" href="Integration.html#format-gold-integration-files"><span class="std std-ref">gold integration files</span></a> that are valid
files to exercise compliance with Arrow IPC features;</p></li>
<li><p><a class="reference internal" href="../developers/cpp/fuzzing.html#fuzz-regression-files"><span class="std std-ref">fuzz regression files</span></a> that have been automatically
generated each time a fuzzer founds a bug triggered by a specific (usually invalid)
input for a given format.</p></li>
</ol>
</section>
<section id="fuzzing">
<h4>Fuzzing<a class="headerlink" href="#fuzzing" title="Link to this heading">#</a></h4>
<p>It is <strong>recommended</strong> that you go one step further and set up some kind of
automated robustness testing against unforeseen inputs. One typical approach
is though fuzzing, possibly coupled with a runtime instrumentation framework
that detects dangerous behavior (such as Address Sanitizer in C++ or
Rust).</p>
<p>A reasonable way of setting up fuzzing for Arrow is using the IPC format as
a binary payload; the fuzz target should not only attempt to decode the IPC
stream as Arrow data, but it should then validate the Arrow data.
This will strengthen both the IPC decoder and the validation routines
against invalid, potentially malicious data. Finally, if validation comes out
successfully, the fuzz target may exercise some important core functionality,
such as printing the data for human display; this will help ensure that the
validation routine did not let through invalid data that may lead to dangerous
behavior.</p>
</section>
</section>
</section>
<section id="non-arrow-formats-and-protocols">
<h2>Non-Arrow formats and protocols<a class="headerlink" href="#non-arrow-formats-and-protocols" title="Link to this heading">#</a></h2>
<p>Arrow data can also be sent or stored using third-party formats such as Apache
Parquet. Those formats may or may not present the same security risks as listed
above (for example, the precautions around uninitialized data may not apply
in a format like Parquet that does not create any value slots for null elements).
We suggest you refer to these projects’ own documentation for more concrete
guidelines.</p>
</section>
</section>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
<a class="left-prev"
href="ADBC.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">ADBC: Arrow Database Connectivity</p>
</div>
</a>
<a class="right-next"
href="Integration.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Integration Testing</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<dialog id="pst-secondary-sidebar-modal"></dialog>
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav id="pst-page-toc-nav" class="page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="pst-show_toc_level nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#who-should-read-this">Who should read this</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#columnar-format">Columnar Format</a><ul class="pst-show_toc_level nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#invalid-data">Invalid data</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#advice-for-users">Advice for users</a></li>
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#advice-for-implementors">Advice for implementors</a></li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#uninitialized-data">Uninitialized data</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#advice-for-users-and-implementors">Advice for users and implementors</a></li>
</ul>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#c-data-interface">C Data Interface</a><ul class="pst-show_toc_level nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Advice for users</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Advice for implementors</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ipc-format">IPC Format</a><ul class="pst-show_toc_level nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">Advice for users</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id4">Advice for implementors</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#extension-types">Extension Types</a><ul class="pst-show_toc_level nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id5">Advice for users and implementors</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#testing-for-robustness">Testing for robustness</a><ul class="pst-show_toc_level nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id6">Advice for implementors</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#testing-against-known-regression-files">Testing against known regression files</a></li>
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#fuzzing">Fuzzing</a></li>
</ul>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#non-arrow-formats-and-protocols">Non-Arrow formats and protocols</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/format/Security.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../_static/scripts/bootstrap.js?digest=55c26ed414f5f8a31ebb"></script>
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=55c26ed414f5f8a31ebb"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2026 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 9.1.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
<!-- # L10n: Setting the PST URL as an argument as this does not need to be localized -->
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.17.1.
</p></div>
</div>
</div>
</footer>
</body>
</html>