| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="../" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>Dissociated IPC Protocol — Apache Arrow v22.0.0.dev115</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; |
| </script> |
| <!-- |
| this give us a css class that will be invisible only if js is disabled |
| --> |
| <noscript> |
| <style> |
| .pst-js-only { display: none !important; } |
| |
| </style> |
| </noscript> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=03e43079" /> |
| <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" /> |
| <link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css?v=8dcd28dc" /> |
| |
| <!-- So that users can add custom icons --> |
| <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script> |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" /> |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" /> |
| |
| <script src="../_static/documentation_options.js?v=ae2a2051"></script> |
| <script src="../_static/doctools.js?v=9bcbadda"></script> |
| <script src="../_static/sphinx_highlight.js?v=dc90522c"></script> |
| <script src="../_static/clipboard.min.js?v=a7894cd8"></script> |
| <script src="../_static/copybutton.js?v=3bb21c8c"></script> |
| <script src="../_static/design-tabs.js?v=f930bc37"></script> |
| <script type="module" src="https://cdn.jsdelivr.net/npm/mermaid@11.2.0/dist/mermaid.esm.min.mjs"></script> |
| <script type="module" src="https://cdn.jsdelivr.net/npm/@mermaid-js/layout-elk@0.1.4/dist/mermaid-layout-elk.esm.min.mjs"></script> |
| <script type="module">import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@11.2.0/dist/mermaid.esm.min.mjs";import elkLayouts from "https://cdn.jsdelivr.net/npm/@mermaid-js/layout-elk@0.1.4/dist/mermaid-layout-elk.esm.min.mjs";mermaid.registerLayoutLoaders(elkLayouts);mermaid.initialize({startOnLoad:false});</script> |
| <script type="module"> |
| import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@11.2.0/dist/mermaid.esm.min.mjs"; |
| window.addEventListener("load", () => mermaid.run()); |
| </script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'format/DissociatedIPC';</script> |
| <script> |
| DOCUMENTATION_OPTIONS.theme_version = '0.16.1'; |
| DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json'; |
| DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/'; |
| DOCUMENTATION_OPTIONS.show_version_warning_banner = |
| true; |
| </script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/format/DissociatedIPC.html" /> |
| <link rel="icon" href="../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Arrow Flight RPC" href="Flight.html" /> |
| <link rel="prev" title="Statistics schema" href="StatisticsSchema.html" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| <meta name="docsearch:version" content="22.0.0.dev115" /> |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i>Back to top</button> |
| |
| |
| <dialog id="pst-search-dialog"> |
| |
| <form class="bd-search d-flex align-items-center" |
| action="../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form> |
| </dialog> |
| |
| <div class="pst-async-banner-revealer d-none"> |
| <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> |
| </div> |
| |
| |
| <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> |
| <div class="bd-header__inner bd-page-width"> |
| <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation"> |
| <span class="fa-solid fa-bars"></span> |
| </button> |
| |
| |
| <div class=" navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| |
| |
| <a class="navbar-brand logo" href="../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v22.0.0.dev115 - Home"/> |
| <img src="../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v22.0.0.dev115 - Home"/> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| <div class=" navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-2" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-2" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-2" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-2"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| |
| <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page"> |
| <span class="fa-solid fa-outdent"></span> |
| </button> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <dialog id="pst-primary-sidebar-modal"></dialog> |
| <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-3" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-3" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-3" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-3"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <nav class="bd-docs-nav bd-links" |
| aria-label="Section Navigation"> |
| <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p> |
| <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="Intro.html">Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Columnar.html">Arrow Columnar Format</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Versioning.html">Format Versioning and Stability</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Changing.html">Changing the Apache Arrow Format Specification</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CanonicalExtensions.html">Canonical Extension Types</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Other.html">Other Data Structures</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="CDataInterface.html">The Arrow C data interface</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="CStreamInterface.html">The Arrow C stream interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CDeviceDataInterface.html">The Arrow C Device data interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="StatisticsSchema.html">Statistics schema</a></li> |
| <li class="toctree-l1 current active"><a class="current reference internal" href="#">Dissociated IPC Protocol</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Flight.html">Arrow Flight RPC</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="FlightSql.html">Arrow Flight SQL</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ADBC.html">ADBC: Arrow Database Connectivity</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Integration.html">Integration Testing</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Glossary.html">Glossary</a></li> |
| </ul> |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <div id="ethical-ad-placement" |
| class="flat" |
| data-ea-publisher="readthedocs" |
| data-ea-type="readthedocs-sidebar" |
| data-ea-manual="true"> |
| </div></div> |
| </div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main" role="main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article d-print-none"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| <nav aria-label="Breadcrumb" class="d-print-none"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="index.html" class="nav-link">Specifications</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Dissociated IPC Protocol</span></li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <section id="dissociated-ipc-protocol"> |
| <span id="dissociated-ipc"></span><h1>Dissociated IPC Protocol<a class="headerlink" href="#dissociated-ipc-protocol" title="Link to this heading">#</a></h1> |
| <div class="admonition warning"> |
| <p class="admonition-title">Warning</p> |
| <p>Experimental: The Dissociated IPC Protocol is experimental in its current |
| form. Based on feedback and usage the protocol definition may change until |
| it is fully standardized.</p> |
| </div> |
| <section id="rationale"> |
| <h2>Rationale<a class="headerlink" href="#rationale" title="Link to this heading">#</a></h2> |
| <p>The <a class="reference internal" href="Columnar.html#format-ipc"><span class="std std-ref">Arrow IPC format</span></a> describes a protocol for transferring |
| Arrow data as a stream of record batches. This protocol expects a continuous |
| stream of bytes divided into discrete messages (using a length prefix and |
| continuation indicator). Each discrete message consists of two portions:</p> |
| <ul class="simple"> |
| <li><p>A <a class="reference external" href="http://github.com/google/flatbuffers">Flatbuffers</a> header message</p></li> |
| <li><p>A series of bytes consisting of the flattened and packed body buffers (some |
| message types, like Schema messages, do not have this section) |
| - This is referred to as the <em>message body</em> in the IPC format spec.</p></li> |
| </ul> |
| <p>For most cases, the existing IPC format as it currently exists is sufficiently efficient:</p> |
| <ul class="simple"> |
| <li><p>Receiving data in the IPC format allows zero-copy utilization of the body |
| buffer bytes, no deserialization is required to form Arrow Arrays</p></li> |
| <li><p>An IPC file format can be memory-mapped because it is location agnostic |
| and the bytes of the file are exactly what is expected in memory.</p></li> |
| </ul> |
| <p>However, there are use cases that aren’t handled by this:</p> |
| <ul class="simple"> |
| <li><p>Constructing the IPC record batch message requires allocating a contiguous |
| chunk of bytes and copying all of the data buffers into it, packed together |
| back-to-back. This pessimizes the common case of wrapping existing, directly |
| consumable data into an IPC message.</p></li> |
| <li><p>Even if Arrow data is located in a memory accessible across process boundaries |
| or transports (such as UCX), there is no standard way to specify that shared |
| location to consumers which could take advantage of it.</p></li> |
| <li><p>Arrow data located on a non-CPU device (such as a GPU) cannot be sent using |
| Arrow IPC without having to copy the data back to the host device or copying |
| the Flatbuffers metadata bytes into device memory.</p> |
| <ul> |
| <li><p>By the same token, receiving IPC messages into device memory would require |
| performing a copy of the Flatbuffers metadata back to the host CPU device. This |
| is due to the fact that the IPC stream interleaves data and metadata across a |
| single stream.</p></li> |
| </ul> |
| </li> |
| </ul> |
| <p>This protocol attempts to solve these use cases in an efficient manner.</p> |
| <section id="goals"> |
| <h3>Goals<a class="headerlink" href="#goals" title="Link to this heading">#</a></h3> |
| <ul class="simple"> |
| <li><p>Define a generic protocol for passing Arrow IPC data, not tied to any particular |
| transport, that also allows for utilizing non-CPU device memory, shared memory, and |
| newer “high performance” transports such as <a class="reference external" href="https://openucx.org/">UCX</a> or <a class="reference external" href="https://ofiwg.github.io/libfabric/">libfabric</a>.</p> |
| <ul> |
| <li><p>This allows for the data in the body to be kept on non-CPU devices (like GPUs) |
| without expensive device-to-host copies.</p></li> |
| </ul> |
| </li> |
| <li><p>Allow for using <a class="reference internal" href="Flight.html#flight-rpc"><span class="std std-ref">Flight RPC</span></a> purely for control flow by separating |
| the stream of IPC metadata from IPC body bytes</p></li> |
| </ul> |
| </section> |
| <section id="definitions"> |
| <h3>Definitions<a class="headerlink" href="#definitions" title="Link to this heading">#</a></h3> |
| <dl class="glossary"> |
| <dt id="term-IPC-Metadata">IPC Metadata<a class="headerlink" href="#term-IPC-Metadata" title="Link to this term">#</a></dt><dd><p>The Flatbuffers message bytes that encompass the header of an Arrow IPC message</p> |
| </dd> |
| <dt id="term-Tag">Tag<a class="headerlink" href="#term-Tag" title="Link to this term">#</a></dt><dd><p>A little-endian <code class="docutils literal notranslate"><span class="pre">uint64</span></code> value used for flow control and used in determining |
| how to interpret the body of a message. Specific bits can be masked to allow |
| identifying messages by only a portion of the tag, leaving the rest of the bits |
| to be used for control flow or other message metadata. Some transports, such as |
| UCX, have built-in support for such tag values and will provide them in CPU |
| memory regardless of whether or not the body of the message may reside on a |
| non-CPU device.</p> |
| </dd> |
| <dt id="term-Sequence-Number">Sequence Number<a class="headerlink" href="#term-Sequence-Number" title="Link to this term">#</a></dt><dd><p>A little-endian, 4-byte unsigned integer starting at 0 for a stream, indicating |
| the sequence order of messages. It is also used to identify specific messages to |
| tie the IPC metadata header to its corresponding body since the metadata and body |
| can be sent across separate pipes/streams/transports.</p> |
| <p>If a sequence number reaches <code class="docutils literal notranslate"><span class="pre">UINT32_MAX</span></code>, it should be allowed to roll over as |
| it is unlikely there would be enough unprocessed messages waiting to be processed |
| that would cause an overlap of sequence numbers.</p> |
| <p>The sequence number serves two purposes: To identify corresponding metadata and |
| tagged body data messages and to ensure we do not rely on messages having to arrive |
| in order. A client should use the sequence number to correctly order messages as |
| they arrive for processing.</p> |
| </dd> |
| </dl> |
| </section> |
| </section> |
| <section id="the-protocol"> |
| <h2>The Protocol<a class="headerlink" href="#the-protocol" title="Link to this heading">#</a></h2> |
| <p>A reference example implementation utilizing <a class="reference external" href="https://docs.rapids.ai/api">libcudf</a> and <a class="reference external" href="https://openucx.org/">UCX</a> can be found in the |
| <a class="reference external" href="https://github.com/apache/arrow-experiments/tree/main/dissociated-ipc">arrow-experiments repo</a>.</p> |
| <section id="requirements"> |
| <h3>Requirements<a class="headerlink" href="#requirements" title="Link to this heading">#</a></h3> |
| <p>A transport implementing this protocol <strong>MUST</strong> provide two pieces of functionality:</p> |
| <ul class="simple"> |
| <li><p>Message sending</p> |
| <ul> |
| <li><p>Delimited messages (like gRPC) as opposed to non-delimited streams (like plain TCP |
| without further framing).</p></li> |
| <li><p>Alternatively, a framing mechanism like the <a class="reference internal" href="Columnar.html#ipc-message-format"><span class="std std-ref">encapsulated message format</span></a> |
| for the IPC protocol can be used while leaving out the body bytes.</p></li> |
| </ul> |
| </li> |
| <li><p>Tagged message sending</p> |
| <ul> |
| <li><p>Sending a message that has an attached little-endian, unsigned 64-bit integral tag |
| for control flow. A tag like this allows control flow to operate on a message whose body |
| is on a non-CPU device without requiring the message itself to get copied off of the device.</p></li> |
| </ul> |
| </li> |
| </ul> |
| </section> |
| <section id="uri-specification"> |
| <h3>URI Specification<a class="headerlink" href="#uri-specification" title="Link to this heading">#</a></h3> |
| <p>When providing a URI to a consumer to contact for use with this protocol (such as via |
| the <a class="reference internal" href="Flight.html#flight-location-uris"><span class="std std-ref">Location URI for Flight</span></a>), the URI should specify a scheme |
| like <em>ucx:</em> or <em>fabric:</em>, that is easily identifiable. In addition, the URI should |
| encode the following URI query parameters:</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>As this protocol matures, this document will get updated with commonly recognized |
| transport schemes that get used with it.</p> |
| </div> |
| <ul class="simple"> |
| <li><p><code class="docutils literal notranslate"><span class="pre">want_data</span></code> - <strong>REQUIRED</strong> - uint64 integer value</p> |
| <ul> |
| <li><p>This value should be used to tag an initial message to the server to initiate a |
| data transfer. The body of the initiating message should be an opaque binary identifier |
| of the data stream being requested (like the <code class="docutils literal notranslate"><span class="pre">Ticket</span></code> in the Flight RPC protocol)</p></li> |
| </ul> |
| </li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">free_data</span></code> - <strong>OPTIONAL</strong> - uint64 integer value</p> |
| <ul> |
| <li><p>If the server might send messages using offsets / addresses for remote memory accessing |
| or shared memory locations, the URI should include this parameter. This value is used to |
| tag messages sent from the client to the data server, containing specific offsets / addresses |
| which were provided that are no longer required by the client (i.e. any operations that |
| directly reference those memory locations, such as copying the remote data into local memory, |
| have been completed).</p></li> |
| </ul> |
| </li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">remote_handle</span></code> - <strong>OPTIONAL</strong> - base64-encoded string</p> |
| <ul> |
| <li><p>When working with shared memory or remote memory, this value indicates any required |
| handle or identifier that is necessary for accessing the memory.</p> |
| <ul> |
| <li><p>Using UCX, this would be an <em>rkey</em> value</p></li> |
| <li><p>With CUDA IPC, this would be the value of the base GPU pointer or memory handle, |
| and subsequent addresses would be offsets from this base pointer.</p></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </section> |
| <section id="handling-of-backpressure"> |
| <h3>Handling of Backpressure<a class="headerlink" href="#handling-of-backpressure" title="Link to this heading">#</a></h3> |
| <p><em>Currently</em> this proposal does not specify any way to manage the backpressure of |
| messages to throttle for memory and bandwidth reasons. For now, this will be |
| <strong>transport-defined</strong> rather than lock into something sub-optimal.</p> |
| <p>As usage among different transports and libraries grows, common patterns will emerge |
| that will allow for a generic, but efficient, way to handle backpressure across |
| different use cases.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>While the protocol itself is transport agnostic, the current usage and examples |
| only have been tested using UCX and libfabric transports so far, but that’s all.</p> |
| </div> |
| </section> |
| </section> |
| <section id="protocol-description"> |
| <h2>Protocol Description<a class="headerlink" href="#protocol-description" title="Link to this heading">#</a></h2> |
| <p>There are two possibilities that can occur:</p> |
| <ol class="arabic simple"> |
| <li><p>The streams of metadata and body data are sent across separate connections</p></li> |
| </ol> |
| <object data="../_images/mermaid-8abfde41766e4e63ac35f7f099c36aab16139fbc.svg" type="image/svg+xml"> |
| <p class="warning">%% Licensed to the Apache Software Foundation (ASF) under one |
| %% or more contributor license agreements. See the NOTICE file |
| %% distributed with this work for additional information |
| %% regarding copyright ownership. The ASF licenses this file |
| %% to you under the Apache License, Version 2.0 (the |
| %% "License"); you may not use this file except in compliance |
| %% with the License. You may obtain a copy of the License at |
| %% |
| %% http://www.apache.org/licenses/LICENSE-2.0 |
| %% |
| %% Unless required by applicable law or agreed to in writing, |
| %% software distributed under the License is distributed on an |
| %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| %% KIND, either express or implied. See the License for the |
| %% specific language governing permissions and limitations |
| %% under the License. |
| |
| sequenceDiagram |
| participant D as Data Stream |
| participant C as Client |
| participant M as Metadata Stream |
| |
| activate C |
| C-->>+M: TaggedMessage(server.want_data, bytes=ID_of_desired_data) |
| C-->>+D: TaggedMessage(server.want_data, bytes=ID_of_desired_data) |
| M-->>C: Message(bytes([1]) + le_bytes(sequence_number) + schema_metadata) |
| loop each batch |
| par |
| M-->>C: Message(bytes([1]) + le_bytes(sequence_number) + batch_metadata) |
| and |
| alt |
| D-->>C: TaggedMessage((bytes[0] << 55) | le_bytes(sequence_number),<br/>bytes=batch_data) |
| else |
| D-->>C: TaggedMessage((bytes[1] << 55) | le_bytes(sequence_number),<br/>bytes=uint64_pairs) |
| end |
| end |
| end |
| M-->>C: Message(bytes([0]) + le_bytes(sequence_number)) |
| deactivate M |
| loop |
| C-->>D: TaggedMessage(server.free_data, bytes=uint64_list) |
| end |
| deactivate D |
| deactivate C</p></object> |
| <ol class="arabic simple" start="2"> |
| <li><p>The streams of metadata and body data are sent simultaneously across the |
| same connection</p></li> |
| </ol> |
| <object data="../_images/mermaid-e962a4be5befa97b0166c4cae51c8201f88d2bb5.svg" type="image/svg+xml"> |
| <p class="warning">%% Licensed to the Apache Software Foundation (ASF) under one |
| %% or more contributor license agreements. See the NOTICE file |
| %% distributed with this work for additional information |
| %% regarding copyright ownership. The ASF licenses this file |
| %% to you under the Apache License, Version 2.0 (the |
| %% "License"); you may not use this file except in compliance |
| %% with the License. You may obtain a copy of the License at |
| %% |
| %% http://www.apache.org/licenses/LICENSE-2.0 |
| %% |
| %% Unless required by applicable law or agreed to in writing, |
| %% software distributed under the License is distributed on an |
| %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| %% KIND, either express or implied. See the License for the |
| %% specific language governing permissions and limitations |
| %% under the License. |
| |
| sequenceDiagram |
| participant C as Client |
| participant S as Server |
| activate C |
| C-->>+S: TaggedMessage(server.want_data, bytes=ID_of_desired_data) |
| S-->>C: Message(bytes([1]) + le_bytes(sequence_number) + schema_metadata) |
| par |
| loop each chunk |
| S-->>C: Message(bytes([1]) + le_bytes(sequence_number) + batch_metadata) |
| end |
| S-->>C: Message(bytes([0]) + le_bytes(sequence_number)) |
| and |
| loop each chunk |
| alt |
| S-->>C: TaggedMessage((bytes[0] << 55) | le_bytes(sequence_number),<br/>bytes=batch_data) |
| else |
| S-->>C: TaggedMessage((bytes[1] << 55) | le_bytes(sequence_number),<br/>bytes=uint64_pairs) |
| end |
| end |
| end |
| |
| loop |
| C-->>S: TaggedMessage(server.free_data, bytes=uint64_list) |
| end |
| deactivate S |
| deactivate C</p></object> |
| <section id="server-sequence"> |
| <h3>Server Sequence<a class="headerlink" href="#server-sequence" title="Link to this heading">#</a></h3> |
| <p>There can be either a single server handling both the IPC Metadata stream and the |
| Body data streams, or separate servers for handling the IPC Metadata and the body |
| data. This allows for streaming of data across either a single transport pipe or |
| two pipes if desired.</p> |
| <section id="metadata-stream-sequence"> |
| <h4>Metadata Stream Sequence<a class="headerlink" href="#metadata-stream-sequence" title="Link to this heading">#</a></h4> |
| <p>The standing state of the server is waiting for a <strong>tagged</strong> message with a specific |
| <code class="docutils literal notranslate"><span class="pre"><want_data></span></code> tag value to initiate a transfer. This <code class="docutils literal notranslate"><span class="pre"><want_data></span></code> value is defined |
| by the server and propagated to any clients via the URI they are provided. This protocol |
| does not prescribe any particular value so that it will not interfere with any other |
| existing protocols that rely on tag values. The body of that message will contain an |
| opaque, binary identifier to indicate a particular dataset / data stream to send.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>For instance, the <strong>ticket</strong> that was passed with a <em>FlightInfo</em> message would be |
| the body of this message. Because it is opaque, it can be anything the server wants |
| to use. The URI and identifier do not need to be given to the client via Flight RPC, |
| but could come across from any transport or protocol desired.</p> |
| </div> |
| <p>Upon receiving a <code class="docutils literal notranslate"><span class="pre"><want_data></span></code> request, the server <em>should</em> respond by sending a stream |
| of messages consisting of the following:</p> |
| <object data="../_images/mermaid-73154fdcb1d4fb160464a776ec1b8cdf50794f49.svg" type="image/svg+xml"> |
| <p class="warning">block-beta |
| columns 8 |
| |
| block:P["\n\n\n\nPrefix"]:5 |
| T["Message type\nByte 0"] |
| S["Sequence number\nBytes 1-4"] |
| end |
| H["Flatbuffer bytes\nRest of the message"]:3</p></object> |
| <ul class="simple"> |
| <li><p>A 5-byte prefix</p> |
| <ul> |
| <li><p>The first byte of the message indicates the type of message, currently there are only |
| two allowed message types (more types may get added in the future):</p> |
| <ol class="arabic simple" start="0"> |
| <li><p>End of Stream</p></li> |
| <li><p>Flatbuffers IPC Metadata Message</p></li> |
| </ol> |
| </li> |
| <li><p>the next 4-bytes are a little-endian, unsigned 32-bit integer indicating the sequence number of |
| the message. The first message in the stream (<strong>MUST</strong> always be a schema message) <strong>MUST</strong> |
| have a sequence number of <code class="docutils literal notranslate"><span class="pre">0</span></code>. Each subsequent message <strong>MUST</strong> increment the number by |
| <code class="docutils literal notranslate"><span class="pre">1</span></code>.</p></li> |
| </ul> |
| </li> |
| <li><p>The full Flatbuffers bytes of an Arrow IPC header</p></li> |
| </ul> |
| <p>As defined in the Arrow IPC format, each metadata message can represent a chunk of data or |
| dictionaries for use by the stream of data.</p> |
| <p>After sending the last metadata message, the server <strong>MUST</strong> indicate the end of the stream |
| by sending a message consisting of <strong>exactly</strong> 5 bytes:</p> |
| <ul class="simple"> |
| <li><p>The first byte is <code class="docutils literal notranslate"><span class="pre">0</span></code>, indicating an <strong>End of Stream</strong> message</p></li> |
| <li><p>The last 4 bytes are the sequence number (4-byte, unsigned integer in little-endian byte order)</p></li> |
| </ul> |
| </section> |
| <section id="data-stream-sequence"> |
| <h4>Data Stream Sequence<a class="headerlink" href="#data-stream-sequence" title="Link to this heading">#</a></h4> |
| <p>If a single server is handling both the data and metadata streams, then the data messages |
| <strong>should</strong> begin being sent to the client in parallel with the metadata messages. Otherwise, |
| as with the metadata sequence, the standing state of the server is to wait for a <strong>tagged</strong> |
| message with the <code class="docutils literal notranslate"><span class="pre"><want_data></span></code> tag value, whose body indicates the dataset / data stream |
| to send to the client.</p> |
| <p>For each IPC message in the stream of data, a <strong>tagged</strong> message <strong>MUST</strong> be sent on the data |
| stream if that message has a body (i.e. a Record Batch or Dictionary message). The |
| <a class="reference internal" href="#term-Tag"><span class="xref std std-term">tag</span></a> for each message should be structured as follows:</p> |
| <object data="../_images/mermaid-4a15cc28d43fb7ee5221b8108ca361508723820b.svg" type="image/svg+xml"> |
| <p class="warning">block-beta |
| columns 8 |
| |
| S["Sequence number\nBytes 0-3"]:4 |
| U["Unused (Reserved)\nBytes 4-6"]:3 |
| T["Message type\nByte 7"]:1</p></object> |
| <ul class="simple"> |
| <li><p>The <em>least significant</em> 4-bytes (bits 0 - 31) of the tag should be the unsigned 32-bit, little-endian sequence |
| number of the message.</p></li> |
| <li><p>The <em>most significant</em> byte (bits 56 - 63) of the tag indicates the message body <strong>type</strong> as an 8-bit |
| unsigned integer. Currently only two message types are specified, but more can be added as |
| needed to expand the protocol:</p> |
| <ol class="arabic simple" start="0"> |
| <li><p>The body contains the raw body buffer bytes as a packed buffer (i.e. the standard IPC |
| format body bytes)</p></li> |
| <li><p>The body contains a series of unsigned, little-endian 64-bit integer pairs to represent |
| either shared or remote memory, schematically structured as</p> |
| <ul> |
| <li><p>The first two integers (e.g. the first 16 bytes) represent the <em>total</em> size (in bytes) |
| of all buffers and the number of buffers in this message (and thus the number of following |
| pairs of <code class="docutils literal notranslate"><span class="pre">uint64</span></code>)</p></li> |
| <li><p>Each subsequent pair of <code class="docutils literal notranslate"><span class="pre">uint64</span></code> values are an address / offset followed the length of |
| that particular buffer.</p></li> |
| </ul> |
| </li> |
| </ol> |
| </li> |
| <li><p>All unspecified bits (bits 32 - 55) of the tag are <em>reserved</em> for future use by potential updates |
| to this protocol. For now they <strong>MUST</strong> be 0.</p></li> |
| </ul> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Any shared/remote memory addresses that are sent across <strong>MUST</strong> be kept alive by the server |
| until a corresponding tagged <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> message is received. If the client disconnects |
| before sending any <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> messages, it can be assumed to be safe to clean up the memory |
| if desired by the server.</p> |
| </div> |
| <p>After sending the last tagged IPC body message, the server should maintain the connection and wait |
| for tagged <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> messages. The structure of these <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> messages is simple: |
| one or more unsigned, little-endian 64-bit integers which indicate the addresses/offsets that can |
| be freed.</p> |
| <p>Once there are no more outstanding addresses to be freed, the work for this stream is complete.</p> |
| </section> |
| </section> |
| <section id="client-sequence"> |
| <h3>Client Sequence<a class="headerlink" href="#client-sequence" title="Link to this heading">#</a></h3> |
| <p>A client for this protocol needs to concurrently handle both the data and metadata streams of |
| messages which may either both come from the same server or different servers. Below is a flowchart |
| showing how a client might handle the metadata and data streams:</p> |
| <object data="../_images/mermaid-8ff51316a5bfe716c8346df112ea33beaa5228f4.svg" type="image/svg+xml"> |
| <p class="warning">%% Licensed to the Apache Software Foundation (ASF) under one |
| %% or more contributor license agreements. See the NOTICE file |
| %% distributed with this work for additional information |
| %% regarding copyright ownership. The ASF licenses this file |
| %% to you under the Apache License, Version 2.0 (the |
| %% "License"); you may not use this file except in compliance |
| %% with the License. You may obtain a copy of the License at |
| |
| %% http://www.apache.org/licenses/LICENSE-2.0 |
| |
| %% Unless required by applicable law or agreed to in writing, |
| %% software distributed under the License is distributed on an |
| %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| %% KIND, either express or implied. See the License for the |
| %% specific language governing permissions and limitations |
| %% under the License. |
| |
| graph LR |
| client((Client))-->c1{{Send #60;want_data#gt; Msg}} |
| subgraph meta [Meta Message] |
| direction LR |
| m1[/Msg Type #40;byte 0#41;<br/>Seq Num #40;bytes 1-5#41;/]-- type 1 -->m2[[Process IPC Header]] |
| m2-- IPC has body -->m3[Get Corresponding<br/>Tagged Msg] |
| m2-- Schema Msg -->m4[/Store Schema/] |
| m1-- type 0 -->e[Indicate End of Stream] |
| end |
| subgraph data [Data Stream] |
| direction LR |
| d1[Request Msg<br/>for Seq Num]-->d2{Most Significant<br/>Byte} |
| d2-- 0 -->d3[Construct from<br/>Metadata and Body] |
| d2-- 1 -->d4[Get shared/remote<br/>buffers] |
| d4 -->d5[Construct from<br/>Metadata and buffers] |
| d3 & d5 -->e2[Output Batch] |
| end |
| |
| client -- recv untagged msg --> meta |
| client -- get tagged msg --> data</p></object> |
| <ol class="arabic simple"> |
| <li><p>First the client sends a tagged message using the <code class="docutils literal notranslate"><span class="pre"><want_data></span></code> value it was provided in the |
| URI as the tag, and the opaque ID as the body.</p> |
| <ul class="simple"> |
| <li><p>If the metadata and data servers are separate, then a <code class="docutils literal notranslate"><span class="pre"><want_data></span></code> message needs to be sent |
| separately to each.</p></li> |
| <li><p>In either scenario, the metadata and data streams can be processed concurrently and/or asynchronously |
| depending on the nature of the transports.</p></li> |
| </ul> |
| </li> |
| <li><p>For each <strong>untagged</strong> message the client receives in the metadata stream:</p> |
| <ul class="simple"> |
| <li><p>The first byte of the message indicates whether it is an <em>End of Stream</em> message (value <code class="docutils literal notranslate"><span class="pre">0</span></code>) |
| or a metadata message (value <code class="docutils literal notranslate"><span class="pre">1</span></code>).</p></li> |
| <li><p>The next 4 bytes are the sequence number of the message, an unsigned 32-bit integer in |
| little-endian byte order.</p></li> |
| <li><p>If it is <strong>not</strong> an <em>End of Stream</em> message, the remaining bytes are the IPC Flatbuffer bytes which |
| can be interpreted as normal.</p> |
| <ul> |
| <li><p>If the message has a body (i.e. Record Batch or Dictionary message) then the client should retrieve |
| a tagged message from the Data Stream using the same sequence number.</p></li> |
| </ul> |
| </li> |
| <li><p>If it <strong>is</strong> an <em>End of Stream</em> message, then it is safe to close the metadata connection if there are |
| no gaps in the sequence numbers received.</p></li> |
| </ul> |
| </li> |
| <li><p>When a metadata message that requires a body is received, the tag mask of <code class="docutils literal notranslate"><span class="pre">0x00000000FFFFFFFF</span></code> <strong>should</strong> |
| be used alongside the sequence number to match the message regardless of the higher bytes (e.g. we only |
| care about matching the lower 4 bytes to the sequence number)</p> |
| <ul class="simple"> |
| <li><p>Once received, the Most Significant Byte’s value determines how the client processes the body data:</p> |
| <ul> |
| <li><p>If the most significant byte is 0: Then the body of the message is the raw IPC packed body buffers |
| allowing it to easily be processed with the corresponding metadata header bytes.</p></li> |
| <li><p>If the most significant byte is 1: The body of the message will consist of a series of pairs of |
| unsigned, 64-bit integers in little-endian byte order.</p> |
| <ul> |
| <li><p>The first two integers represent <em>1)</em> the total size of all the body buffers together to allow |
| for easy allocation if an intermediate buffer is needed and <em>2)</em> the number of buffers being sent (<code class="docutils literal notranslate"><span class="pre">nbuf</span></code>).</p></li> |
| <li><p>The rest of the message will be <code class="docutils literal notranslate"><span class="pre">nbuf</span></code> pairs of integers, one for each buffer. Each pair is |
| <em>1)</em> the address / offset of the buffer and <em>2)</em> the length of that buffer. Memory can then be retrieved |
| via shared or remote memory routines based on the underlying transport. These addresses / offsets <strong>MUST</strong> |
| be retained so they can be sent back in <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> messages later, indicating to the server that |
| the client no longer needs the shared memory.</p></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li><p>Once an <em>End of Stream</em> message is received, the client should process any remaining un-processed |
| IPC metadata messages.</p></li> |
| <li><p>After individual memory addresses / offsets are able to be freed by the remote server (in the case where |
| it has sent these rather than the full body bytes), the client should send corresponding <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> messages |
| to the server.</p> |
| <ul class="simple"> |
| <li><p>A single <code class="docutils literal notranslate"><span class="pre"><free_data></span></code> message consists of an arbitrary number of unsigned 64-bit integer values, representing |
| the addresses / offsets which can be freed. The reason for it being an <em>arbitrary number</em> is to allow a client |
| to choose whether to send multiple messages to free multiple addresses or to coalesce multiple addresses into |
| fewer messages to be freed (thus making the protocol less “chatty” if desired)</p></li> |
| </ul> |
| </li> |
| </ol> |
| </section> |
| </section> |
| <section id="continuing-development"> |
| <h2>Continuing Development<a class="headerlink" href="#continuing-development" title="Link to this heading">#</a></h2> |
| <p>If you decide to try this protocol in your own environments and system, we’d love feedback and to learn about |
| your use case. As this is an <strong>experimental</strong> protocol currently, we need real-world usage in order to facilitate |
| improving it and finding the right generalizations to standardize on across transports.</p> |
| <p>Please chime in using the Arrow Developers Mailing list: <a class="reference external" href="https://arrow.apache.org/community/#mailing-lists">https://arrow.apache.org/community/#mailing-lists</a></p> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer d-print-none"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="StatisticsSchema.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Statistics schema</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="Flight.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Arrow Flight RPC</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <dialog id="pst-secondary-sidebar-modal"></dialog> |
| <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#rationale">Rationale</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#goals">Goals</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#definitions">Definitions</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-protocol">The Protocol</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#requirements">Requirements</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#uri-specification">URI Specification</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#handling-of-backpressure">Handling of Backpressure</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#protocol-description">Protocol Description</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#server-sequence">Server Sequence</a><ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#metadata-stream-sequence">Metadata Stream Sequence</a></li> |
| <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#data-stream-sequence">Data Stream Sequence</a></li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#client-sequence">Client Sequence</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#continuing-development">Continuing Development</a></li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/main/docs/source/format/DissociatedIPC.rst"> |
| <i class="fa-solid fa-pencil"></i> |
| |
| |
| |
| Edit on GitHub |
| |
| |
| </a> |
| </div> |
| </div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script> |
| <script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"> |
| |
| <p class="copyright"> |
| |
| © Copyright 2016-2025 Apache Software Foundation. |
| Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. |
| <br/> |
| |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"> |
| <p class="theme-version"> |
| <!-- # L10n: Setting the PST URL as an argument as this does not need to be localized --> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.16.1. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |