| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="../" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>Canonical Extension Types — Apache Arrow v24.0.0.dev316</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; |
| </script> |
| <!-- |
| this give us a css class that will be invisible only if js is disabled |
| --> |
| <noscript> |
| <style> |
| .pst-js-only { display: none !important; } |
| |
| </style> |
| </noscript> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../_static/styles/theme.css?digest=7f76b32a3354e82990f2" rel="stylesheet" /> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=7f76b32a3354e82990f2" rel="stylesheet" /> |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=03e43079" /> |
| <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" /> |
| <link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css?v=8dcd28dc" /> |
| |
| <!-- So that users can add custom icons --> |
| <script defer src="../_static/scripts/fontawesome.js?digest=7f76b32a3354e82990f2"></script> |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=7f76b32a3354e82990f2" /> |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=7f76b32a3354e82990f2" /> |
| |
| <script src="../_static/documentation_options.js?v=63fde1ec"></script> |
| <script src="../_static/doctools.js?v=fd6eb6e6"></script> |
| <script src="../_static/sphinx_highlight.js?v=6ffebe34"></script> |
| <script src="../_static/clipboard.min.js?v=a7894cd8"></script> |
| <script src="../_static/copybutton.js?v=3bb21c8c"></script> |
| <script src="../_static/design-tabs.js?v=f930bc37"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'format/CanonicalExtensions';</script> |
| <script> |
| DOCUMENTATION_OPTIONS.theme_version = '0.17.0'; |
| DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json'; |
| DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/'; |
| DOCUMENTATION_OPTIONS.show_version_warning_banner = |
| true; |
| </script> |
| <script>DOCUMENTATION_OPTIONS.search_as_you_type = false;</script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/format/CanonicalExtensions.html" /> |
| <link rel="icon" href="../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Canonical Extension Examples" href="CanonicalExtensions/Examples.html" /> |
| <link rel="prev" title="Changing the Apache Arrow Format Specification" href="Changing.html" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| <meta name="docsearch:version" content="24.0.0.dev316" /> |
| |
| |
| <script src="../_static/searchtools.js"></script> |
| <script src="../_static/language_data.js"></script> |
| <script src="../searchindex.js"></script> |
| |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| <body data-default-mode=""> |
| |
| |
| <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> |
| |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i>Back to top</button> |
| |
| |
| <dialog id="pst-search-dialog"> |
| |
| <form class="bd-search d-flex align-items-center" |
| action="../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form> |
| </dialog> |
| |
| <div class="pst-async-banner-revealer d-none"> |
| <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> |
| </div> |
| |
| |
| <header id="pst-header" class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> |
| <div class="bd-header__inner bd-page-width"> |
| <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation"> |
| <span class="fa-solid fa-bars"></span> |
| </button> |
| |
| |
| <div class=" navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| |
| |
| <a class="navbar-brand logo" href="../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v24.0.0.dev316 - Home"/> |
| <img src="../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v24.0.0.dev316 - Home"/> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| <div class=" navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-2" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-2" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-2" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-2"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| |
| <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page"> |
| <span class="fa-solid fa-outdent"></span> |
| </button> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <dialog id="pst-primary-sidebar-modal"></dialog> |
| <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-3" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-3" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-3" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-3"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item pst-sidebar-collapse"><button id="pst-collapse-sidebar-button" aria-expanded="true" aria-controls="pst-primary-sidebar"><svg class="pst-icon svg-inline--fa" role="img" aria-hidden="true" focusable="false" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"> |
| <path fill="currentColor" d="M3 15.5C2.36232 15.5 1.74874 15.2564 1.28478 14.8189C0.820828 14.3815 0.541576 13.7832 0.504167 13.1467L0.5 13L0.5 3C0.499965 2.36232 0.743605 1.74874 1.18107 1.28478C1.61854 0.820828 2.21676 0.541576 2.85333 0.504167L3 0.5L13 0.5C13.6377 0.499965 14.2513 0.743605 14.7152 1.18107C15.1792 1.61854 15.4584 2.21676 15.4958 2.85333L15.5 3L15.5 13C15.5 13.6377 15.2564 14.2513 14.8189 14.7152C14.3815 15.1792 13.7832 15.4584 13.1467 15.4958L13 15.5L3 15.5ZM3 13.8333L10.5 13.8333L10.5 2.16667L3 2.16667C2.79589 2.16669 2.59889 2.24163 2.44636 2.37726C2.29383 2.5129 2.19638 2.69979 2.1725 2.9025L2.16667 3L2.16667 13C2.16669 13.2041 2.24163 13.4011 2.37726 13.5536C2.5129 13.7062 2.69979 13.8036 2.9025 13.8275L3 13.8333ZM6.65583 10.325L6.5775 10.2558L4.91083 8.58917C4.76735 8.44567 4.68116 8.25476 4.66843 8.05223C4.65569 7.84971 4.71729 7.6495 4.84167 7.48917L4.91083 7.41083L6.5775 5.74417C6.72747 5.59471 6.9287 5.50794 7.14032 5.50148C7.35194 5.49502 7.55809 5.56935 7.7169 5.70937C7.8757 5.8494 7.97525 6.04463 7.99533 6.25539C8.01541 6.46616 7.95451 6.67667 7.825 6.84417L7.75583 6.9225L6.67917 8L7.75583 9.0775C7.89931 9.22099 7.98551 9.41191 7.99824 9.61443C8.01097 9.81695 7.94938 10.0172 7.825 10.1775L7.75583 10.2558C7.61234 10.3993 7.42142 10.4855 7.2189 10.4982C7.01638 10.511 6.81617 10.4494 6.65583 10.325Z"/> |
| </svg> |
| <span class="pst-collapse-sidebar-label">Collapse Sidebar</span> |
| <span class="pst-expand-sidebar-label">Expand Sidebar</span> |
| </button></div> |
| <div class="sidebar-primary-item"> |
| |
| <nav class="bd-docs-nav bd-links" |
| aria-label="Section Navigation"> |
| <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p> |
| <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="Intro.html">Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Columnar.html">Arrow Columnar Format</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Versioning.html">Format Versioning and Stability</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Changing.html">Changing the Apache Arrow Format Specification</a></li> |
| <li class="toctree-l1 current active has-children"><a class="current reference internal" href="#">Canonical Extension Types</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="CanonicalExtensions/Examples.html">Canonical Extension Examples</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="Other.html">Other Data Structures</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="CDataInterface.html">The Arrow C data interface</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="CStreamInterface.html">The Arrow C stream interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CDeviceDataInterface.html">The Arrow C Device data interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="StatisticsSchema.html">Statistics schema</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="DissociatedIPC.html">Dissociated IPC Protocol</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Flight.html">Arrow Flight RPC</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="FlightSql.html">Arrow Flight SQL</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ADBC.html">ADBC: Arrow Database Connectivity</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Security.html">Security Considerations</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Integration.html">Integration Testing</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Glossary.html">Glossary</a></li> |
| </ul> |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <div id="ethical-ad-placement" |
| class="flat" |
| data-ea-publisher="readthedocs" |
| data-ea-type="readthedocs-sidebar" |
| data-ea-manual="true"> |
| </div></div> |
| </div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main" role="main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article d-print-none"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| <nav aria-label="Breadcrumb" class="d-print-none"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="index.html" class="nav-link">Specifications</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Canonical Extension Types</span></li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <section id="canonical-extension-types"> |
| <span id="format-canonical-extensions"></span><h1>Canonical Extension Types<a class="headerlink" href="#canonical-extension-types" title="Link to this heading">#</a></h1> |
| <section id="introduction"> |
| <h2>Introduction<a class="headerlink" href="#introduction" title="Link to this heading">#</a></h2> |
| <p>The Arrow columnar format allows defining |
| <a class="reference internal" href="Columnar.html#format-metadata-extension-types"><span class="std std-ref">extension types</span></a> so as to extend |
| standard Arrow data types with custom semantics. Often these semantics |
| will be specific to a system or application. However, it is beneficial |
| to share the definitions of well-known extension types so as to improve |
| interoperability between different systems integrating Arrow columnar data.</p> |
| <section id="standardization"> |
| <h3>Standardization<a class="headerlink" href="#standardization" title="Link to this heading">#</a></h3> |
| <p>These rules must be followed for the standardization of canonical extension |
| types:</p> |
| <ul class="simple"> |
| <li><p>Canonical extension types are described and maintained below in this document.</p></li> |
| <li><p>Each canonical extension type requires a distinct discussion and vote |
| on the <a class="reference external" href="https://arrow.apache.org/community/">Arrow development mailing-list</a>.</p></li> |
| <li><p>The specification text to be added <em>must</em> follow these requirements:</p> |
| <ol class="arabic simple"> |
| <li><p>It <em>must</em> define a well-defined extension name starting with “<code class="docutils literal notranslate"><span class="pre">arrow.</span></code>”.</p></li> |
| <li><p>Its parameters, if any, <em>must</em> be described in the proposal.</p></li> |
| <li><p>Its serialization <em>must</em> be described in the proposal and should |
| not require unduly implementation work or unusual software dependencies |
| (for example, a trivial custom text format or a JSON-based format would be acceptable).</p></li> |
| <li><p>Its expected semantics <em>should</em> be described as well and any |
| potential ambiguities or pain points addressed or at least mentioned.</p></li> |
| </ol> |
| </li> |
| <li><p>The extension type <em>should</em> have one implementation submitted; |
| preferably two if non-trivial (for example if parameterized).</p></li> |
| </ul> |
| </section> |
| <section id="making-modifications"> |
| <h3>Making Modifications<a class="headerlink" href="#making-modifications" title="Link to this heading">#</a></h3> |
| <p>Like standard Arrow data types, canonical extension types should be considered |
| stable once standardized. Modifying a canonical extension type (for example |
| to expand the set of parameters) should be an exceptional event, follow the |
| same rules as laid out above, and provide backwards compatibility guarantees.</p> |
| </section> |
| </section> |
| <section id="official-list"> |
| <h2>Official List<a class="headerlink" href="#official-list" title="Link to this heading">#</a></h2> |
| <section id="fixed-shape-tensor"> |
| <span id="fixed-shape-tensor-extension"></span><h3>Fixed shape tensor<a class="headerlink" href="#fixed-shape-tensor" title="Link to this heading">#</a></h3> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.fixed_shape_tensor</span></code>.</p></li> |
| <li><p>The storage type of the extension: <code class="docutils literal notranslate"><span class="pre">FixedSizeList</span></code> where:</p> |
| <ul class="simple"> |
| <li><p><strong>value_type</strong> is the data type of individual tensor elements.</p></li> |
| <li><p><strong>list_size</strong> is the product of all the elements in tensor shape.</p></li> |
| </ul> |
| </li> |
| <li><p>Extension type parameters:</p> |
| <ul class="simple"> |
| <li><p><strong>value_type</strong> = the Arrow data type of individual tensor elements.</p></li> |
| <li><p><strong>shape</strong> = the physical shape of the contained tensors |
| as an array.</p></li> |
| </ul> |
| <p>Optional parameters describing the logical layout:</p> |
| <ul> |
| <li><p><strong>dim_names</strong> = explicit names to tensor dimensions |
| as an array. The length of it should be equal to the shape |
| length and equal to the number of dimensions.</p> |
| <p><code class="docutils literal notranslate"><span class="pre">dim_names</span></code> can be used if the dimensions have well-known |
| names and they map to the physical layout (row-major).</p> |
| </li> |
| <li><p><strong>permutation</strong> = indices of the desired ordering of the |
| original dimensions, defined as an array.</p> |
| <p>The indices contain a permutation of the values [0, 1, .., N-1] where |
| N is the number of dimensions. The permutation indicates which |
| dimension of the logical layout corresponds to which dimension of the |
| physical tensor (the i-th dimension of the logical view corresponds |
| to the dimension with number <code class="docutils literal notranslate"><span class="pre">permutations[i]</span></code> of the physical tensor).</p> |
| <p>Permutation can be useful in case the logical order of |
| the tensor is a permutation of the physical order (row-major).</p> |
| <p>When logical and physical layout are equal, the permutation will always |
| be ([0, 1, .., N-1]) and can therefore be left out.</p> |
| </li> |
| </ul> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>The metadata must be a valid JSON object including shape of |
| the contained tensors as an array with key <strong>“shape”</strong> plus optional |
| dimension names with keys <strong>“dim_names”</strong> and ordering of the |
| dimensions with key <strong>“permutation”</strong>.</p> |
| <ul> |
| <li><p>Example: <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"shape":</span> <span class="pre">[2,</span> <span class="pre">5]}</span></code></p></li> |
| <li><p>Example with <code class="docutils literal notranslate"><span class="pre">dim_names</span></code> metadata for NCHW ordered data:</p> |
| <p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"shape":</span> <span class="pre">[100,</span> <span class="pre">200,</span> <span class="pre">500],</span> <span class="pre">"dim_names":</span> <span class="pre">["C",</span> <span class="pre">"H",</span> <span class="pre">"W"]}</span></code></p> |
| </li> |
| <li><p>Example of permuted 3-dimensional tensor:</p> |
| <p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"shape":</span> <span class="pre">[100,</span> <span class="pre">200,</span> <span class="pre">500],</span> <span class="pre">"permutation":</span> <span class="pre">[2,</span> <span class="pre">0,</span> <span class="pre">1]}</span></code></p> |
| <p>This is the physical layout shape and the shape of the logical |
| layout would in this case be <code class="docutils literal notranslate"><span class="pre">[500,</span> <span class="pre">100,</span> <span class="pre">200]</span></code>.</p> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Elements in a fixed shape tensor extension array are stored |
| in row-major/C-contiguous order.</p> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Other Data Structures in Arrow include a |
| <a class="reference external" href="https://arrow.apache.org/docs/format/Other.html">Tensor (Multi-dimensional Array)</a> |
| to be used as a message in the interprocess communication machinery (IPC).</p> |
| <p>This structure has no relationship with the Fixed shape tensor extension type defined |
| by this specification. Instead, this extension type lets one use fixed shape tensors |
| as elements in a field of a RecordBatch or a Table.</p> |
| </div> |
| </section> |
| <section id="variable-shape-tensor"> |
| <span id="variable-shape-tensor-extension"></span><h3>Variable shape tensor<a class="headerlink" href="#variable-shape-tensor" title="Link to this heading">#</a></h3> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.variable_shape_tensor</span></code>.</p></li> |
| <li><p>The storage type of the extension is: <code class="docutils literal notranslate"><span class="pre">StructArray</span></code> where struct |
| is composed of <strong>data</strong> and <strong>shape</strong> fields describing a single |
| tensor per row:</p> |
| <ul class="simple"> |
| <li><p><strong>data</strong> is a <code class="docutils literal notranslate"><span class="pre">List</span></code> holding tensor elements (each list element is |
| a single tensor). The List’s value type is the value type of the tensor, |
| such as an integer or floating-point type.</p></li> |
| <li><p><strong>shape</strong> is a <code class="docutils literal notranslate"><span class="pre">FixedSizeList<int32>[ndim]</span></code> of the tensor shape where |
| the size of the list <code class="docutils literal notranslate"><span class="pre">ndim</span></code> is equal to the number of dimensions of the |
| tensor.</p></li> |
| </ul> |
| </li> |
| <li><p>Extension type parameters:</p> |
| <ul class="simple"> |
| <li><p><strong>value_type</strong> = the Arrow data type of individual tensor elements.</p></li> |
| </ul> |
| <p>Optional parameters describing the logical layout:</p> |
| <ul> |
| <li><p><strong>dim_names</strong> = explicit names to tensor dimensions |
| as an array. The length of it should be equal to the shape |
| length and equal to the number of dimensions.</p> |
| <p><code class="docutils literal notranslate"><span class="pre">dim_names</span></code> can be used if the dimensions have well-known |
| names and they map to the physical layout (row-major).</p> |
| </li> |
| <li><p><strong>permutation</strong> = indices of the desired ordering of the |
| original dimensions, defined as an array.</p> |
| <p>The indices contain a permutation of the values [0, 1, .., N-1] where |
| N is the number of dimensions. The permutation indicates which |
| dimension of the logical layout corresponds to which dimension of the |
| physical tensor (the i-th dimension of the logical view corresponds |
| to the dimension with number <code class="docutils literal notranslate"><span class="pre">permutations[i]</span></code> of the physical tensor).</p> |
| <p>Permutation can be useful in case the logical order of |
| the tensor is a permutation of the physical order (row-major).</p> |
| <p>When logical and physical layout are equal, the permutation will always |
| be ([0, 1, .., N-1]) and can therefore be left out.</p> |
| </li> |
| <li><p><strong>uniform_shape</strong> = sizes of individual tensor’s dimensions which are |
| guaranteed to stay constant in uniform dimensions and can vary in |
| non-uniform dimensions. This holds over all tensors in the array. |
| Sizes in uniform dimensions are represented with int32 values, while |
| sizes of the non-uniform dimensions are not known in advance and are |
| represented with null. If <code class="docutils literal notranslate"><span class="pre">uniform_shape</span></code> is not provided it is assumed |
| that all dimensions are non-uniform. |
| An array containing a tensor with shape (2, 3, 4) and whose first and |
| last dimensions are uniform would have <code class="docutils literal notranslate"><span class="pre">uniform_shape</span></code> (2, null, 4). |
| This allows for interpreting the tensor correctly without accounting for |
| uniform dimensions while still permitting optional optimizations that |
| take advantage of the uniformity.</p></li> |
| </ul> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>The metadata must be a valid JSON object that optionally includes |
| dimension names with keys <strong>“dim_names”</strong> and ordering of dimensions |
| with key <strong>“permutation”</strong>. |
| Shapes of tensors can be defined in a subset of dimensions by providing |
| key <strong>“uniform_shape”</strong>. |
| Minimal metadata is an empty string.</p> |
| <ul> |
| <li><p>Example with <code class="docutils literal notranslate"><span class="pre">dim_names</span></code> metadata for NCHW ordered data (note that the first |
| logical dimension, <code class="docutils literal notranslate"><span class="pre">N</span></code>, is mapped to the <strong>data</strong> List array: each element in the List |
| is a CHW tensor and the List of tensors implicitly constitutes a single NCHW tensor):</p> |
| <p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"dim_names":</span> <span class="pre">["C",</span> <span class="pre">"H",</span> <span class="pre">"W"]</span> <span class="pre">}</span></code></p> |
| </li> |
| <li><p>Example with <code class="docutils literal notranslate"><span class="pre">uniform_shape</span></code> metadata for a set of color images |
| with fixed height, variable width and three color channels:</p> |
| <p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"dim_names":</span> <span class="pre">["H",</span> <span class="pre">"W",</span> <span class="pre">"C"],</span> <span class="pre">"uniform_shape":</span> <span class="pre">[400,</span> <span class="pre">null,</span> <span class="pre">3]</span> <span class="pre">}</span></code></p> |
| </li> |
| <li><p>Example of permuted 3-dimensional tensor:</p> |
| <p><code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"permutation":</span> <span class="pre">[2,</span> <span class="pre">0,</span> <span class="pre">1]</span> <span class="pre">}</span></code></p> |
| <p>For example, if the physical <strong>shape</strong> of an individual tensor |
| is <code class="docutils literal notranslate"><span class="pre">[100,</span> <span class="pre">200,</span> <span class="pre">500]</span></code>, this permutation would denote a logical shape |
| of <code class="docutils literal notranslate"><span class="pre">[500,</span> <span class="pre">100,</span> <span class="pre">200]</span></code>.</p> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>With the exception of <code class="docutils literal notranslate"><span class="pre">permutation</span></code>, the parameters and storage |
| of VariableShapeTensor relate to the <em>physical</em> storage of the tensor.</p> |
| <dl class="simple"> |
| <dt>For example, consider a tensor with::</dt><dd><p>shape = [10, 20, 30] |
| dim_names = [x, y, z] |
| permutations = [2, 0, 1]</p> |
| </dd> |
| </dl> |
| <p>This means the logical tensor has names [z, x, y] and shape [30, 10, 20].</p> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Elements in a variable shape tensor extension array are stored |
| in row-major/C-contiguous order.</p> |
| </div> |
| </section> |
| <section id="json"> |
| <span id="json-extension"></span><h3>JSON<a class="headerlink" href="#json" title="Link to this heading">#</a></h3> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.json</span></code>.</p></li> |
| <li><p>The storage type of this extension is <code class="docutils literal notranslate"><span class="pre">String</span></code> or |
| <code class="docutils literal notranslate"><span class="pre">LargeString</span></code> or <code class="docutils literal notranslate"><span class="pre">StringView</span></code>. |
| Only UTF-8 encoded JSON as specified in <a class="reference external" href="https://datatracker.ietf.org/doc/html/rfc8259">rfc8259</a> is supported.</p></li> |
| <li><p>Extension type parameters:</p> |
| <p>This type does not have any parameters.</p> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>Metadata is either an empty string or a JSON string with an empty object. |
| In the future, additional fields may be added, but they are not required |
| to interpret the array.</p> |
| </li> |
| </ul> |
| </section> |
| <section id="uuid"> |
| <span id="uuid-extension"></span><h3>UUID<a class="headerlink" href="#uuid" title="Link to this heading">#</a></h3> |
| <ul class="simple"> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.uuid</span></code>.</p></li> |
| <li><p>The storage type of the extension is <code class="docutils literal notranslate"><span class="pre">FixedSizeBinary</span></code> with a length of 16 bytes.</p></li> |
| </ul> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>A specific UUID version is not required or guaranteed. This extension represents |
| UUIDs as FixedSizeBinary(16) with big-endian notation and does not interpret the bytes in any way.</p> |
| </div> |
| </section> |
| <section id="opaque"> |
| <span id="opaque-extension"></span><h3>Opaque<a class="headerlink" href="#opaque" title="Link to this heading">#</a></h3> |
| <p>Opaque represents a type that an Arrow-based system received from an external |
| (often non-Arrow) system, but that it cannot interpret. In this case, it can |
| pass on Opaque to its clients to at least show that a field exists and |
| preserve metadata about the type from the other system.</p> |
| <p>Extension parameters:</p> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.opaque</span></code>.</p></li> |
| <li><p>The storage type of this extension is any type. If there is no underlying |
| data, the storage type should be Null.</p></li> |
| <li><p>Extension type parameters:</p> |
| <ul class="simple"> |
| <li><p><strong>type_name</strong> = the name of the unknown type in the external system.</p></li> |
| <li><p><strong>vendor_name</strong> = the name of the external system.</p></li> |
| </ul> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>A valid JSON object containing the parameters as fields. In the future, |
| additional fields may be added, but all fields current and future are never |
| required to interpret the array.</p> |
| <p>Developers <strong>should not</strong> attempt to enable public semantic interoperability |
| of Opaque by canonicalizing specific values of these parameters.</p> |
| </li> |
| </ul> |
| <section id="rationale"> |
| <h4>Rationale<a class="headerlink" href="#rationale" title="Link to this heading">#</a></h4> |
| <p>Interfacing with non-Arrow systems requires a way to handle data that doesn’t |
| have an equivalent Arrow type. In this case, use the Opaque type, which |
| explicitly represents an unsupported field. Other solutions are inadequate:</p> |
| <ul class="simple"> |
| <li><p>Raising an error means even one unsupported field makes all operations |
| impossible, even if (for instance) the user is just trying to view a schema.</p></li> |
| <li><p>Dropping unsupported columns misleads the user as to the actual schema.</p></li> |
| <li><p>An extension type may not exist for the unsupported type.</p></li> |
| <li><p>Generating an extension type on the fly would falsely imply support.</p></li> |
| </ul> |
| <p>Applications <strong>should not</strong> make conventions around vendor_name and type_name. |
| These parameters are meant for human end users to understand what type wasn’t |
| supported. Applications may try to interpret these fields, but must be |
| prepared for breakage (e.g., when the type becomes supported with a custom |
| extension type later on). Similarly, <strong>Opaque is not a generic container for |
| file formats</strong>. Considerations such as MIME types are irrelevant. In both of |
| these cases, create a custom extension type instead.</p> |
| <p>Examples:</p> |
| <ul> |
| <li><p>A Flight SQL service that supports connecting external databases may |
| encounter columns with unsupported types in external tables. In this case, |
| it can use the Opaque[Null] type to at least report that a column exists |
| with a particular name and type name. This lets clients know that a column |
| exists, but is not supported. Null is used as the storage type here because |
| only schemas are involved.</p> |
| <p>An example of the extension metadata would be:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="s2">"type_name"</span><span class="p">:</span> <span class="s2">"varray"</span><span class="p">,</span> <span class="s2">"vendor_name"</span><span class="p">:</span> <span class="s2">"Oracle"</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><p>The ADBC PostgreSQL driver gets results as a series of length-prefixed byte |
| fields. But the driver will not always know how to parse the bytes, as |
| there may be extensions (e.g. PostGIS). It can use Opaque[Binary] to still |
| return those bytes to the application, which may be able to parse the data |
| itself. Opaque differentiates the column from an actual binary column and |
| makes it clear that the value is directly from PostgreSQL. (A custom |
| extension type is preferred, but there will always be extensions that the |
| driver does not know about.)</p> |
| <p>An example of the extension metadata would be:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="s2">"type_name"</span><span class="p">:</span> <span class="s2">"geometry"</span><span class="p">,</span> <span class="s2">"vendor_name"</span><span class="p">:</span> <span class="s2">"PostGIS"</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><p>The ADBC PostgreSQL driver may also know how to parse the bytes, but not |
| know the intended semantics. For example, <a class="reference external" href="https://www.postgresql.org/docs/current/rowtypes.html">composite types</a> can add new |
| semantics to existing types, somewhat like Arrow extension types. The |
| driver would be able to parse the underlying bytes in this case, but would |
| still use the Opaque type.</p> |
| <p>Consider the example in the PostgreSQL documentation of a <code class="docutils literal notranslate"><span class="pre">complex</span></code> type. |
| Mapping the type to a plain Arrow <code class="docutils literal notranslate"><span class="pre">struct</span></code> type would lose meaning, just |
| like how an Arrow system deciding to treat all extension types by dropping |
| the extension metadata would be undesirable. Instead, the driver can use |
| Opaque[Struct] to pass on the composite type info. (It would be wrong to |
| try to map this to an Arrow-defined complex type: it does not know the |
| proper semantics of a user-defined type, which cannot and should not be |
| hardcoded into the driver in the first place.)</p> |
| <p>An example of the extension metadata would be:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="s2">"type_name"</span><span class="p">:</span> <span class="s2">"database_name.schema_name.complex"</span><span class="p">,</span> <span class="s2">"vendor_name"</span><span class="p">:</span> <span class="s2">"PostgreSQL"</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><p>The JDBC adapter in the Arrow Java libraries converts JDBC result sets into |
| Arrow arrays, and can get Arrow schemas from result sets. JDBC, however, |
| allows drivers to return <a class="reference external" href="https://docs.oracle.com/javase/8/docs/api/java/sql/Types.html#OTHER">arbitrary Java objects</a>.</p> |
| <p>The driver can use Opaque[Null] as a placeholder during schema conversion, |
| only erroring if the application tries to fetch the actual data. That way, |
| clients can at least introspect result schemas to decide whether it can |
| proceed to fetch the data, or only query certain columns.</p> |
| <p>An example of the extension metadata would be:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="s2">"type_name"</span><span class="p">:</span> <span class="s2">"OTHER"</span><span class="p">,</span> <span class="s2">"vendor_name"</span><span class="p">:</span> <span class="s2">"JDBC driver name"</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </section> |
| </section> |
| <section id="bit-boolean"> |
| <h3>8-bit Boolean<a class="headerlink" href="#bit-boolean" title="Link to this heading">#</a></h3> |
| <p>Bool8 represents a boolean value using 1 byte (8 bits) to store each value instead of only 1 bit as in |
| the original Arrow Boolean type. Although less compact than the original representation, Bool8 may have |
| better zero-copy compatibility with various systems that also store booleans using 1 byte.</p> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.bool8</span></code>.</p></li> |
| <li><p>The storage type of this extension is <code class="docutils literal notranslate"><span class="pre">Int8</span></code> where:</p> |
| <ul class="simple"> |
| <li><p><strong>false</strong> is denoted by the value <code class="docutils literal notranslate"><span class="pre">0</span></code>.</p></li> |
| <li><p><strong>true</strong> can be specified using any non-zero value. Preferably <code class="docutils literal notranslate"><span class="pre">1</span></code>.</p></li> |
| </ul> |
| </li> |
| <li><p>Extension type parameters:</p> |
| <p>This type does not have any parameters.</p> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>Metadata is an empty string.</p> |
| </li> |
| </ul> |
| </section> |
| <section id="parquet-variant"> |
| <span id="parquet-variant-extension"></span><h3>Parquet Variant<a class="headerlink" href="#parquet-variant" title="Link to this heading">#</a></h3> |
| <p>Variant represents a value that may be one of:</p> |
| <ul class="simple"> |
| <li><p>Primitive: a type and corresponding value (e.g. <code class="docutils literal notranslate"><span class="pre">INT</span></code>, <code class="docutils literal notranslate"><span class="pre">STRING</span></code>)</p></li> |
| <li><p>Array: An ordered list of Variant values</p></li> |
| <li><p>Object: An unordered collection of string/Variant pairs (i.e. key/value pairs). An object may not contain duplicate keys</p></li> |
| </ul> |
| <p>Particularly, this provides a way to represent semi-structured data which is stored as a |
| <a class="reference external" href="https://github.com/apache/parquet-format/blob/master/VariantEncoding.md">Parquet Variant</a> value within Arrow columns in |
| a lossless fashion. This also provides the ability to represent <a class="reference external" href="https://github.com/apache/parquet-format/blob/master/VariantShredding.md">shredded</a> |
| variant values. The canonical extension type allows systems to pass Variant encoded data around without special handling unless |
| they want to directly interact with the encoded variant data. See the Parquet format specification for details on what the actual |
| binary values look like.</p> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.parquet.variant</span></code>.</p></li> |
| <li><p>The storage type of this extension is a <code class="docutils literal notranslate"><span class="pre">Struct</span></code> that obeys the following rules:</p> |
| <ul class="simple"> |
| <li><p>A <em>non-nullable</em> field named <code class="docutils literal notranslate"><span class="pre">metadata</span></code> which is of type <code class="docutils literal notranslate"><span class="pre">Binary</span></code>, <code class="docutils literal notranslate"><span class="pre">LargeBinary</span></code>, or <code class="docutils literal notranslate"><span class="pre">BinaryView</span></code>.</p></li> |
| <li><p>At least one (or both) of the following:</p> |
| <ul> |
| <li><p>A field named <code class="docutils literal notranslate"><span class="pre">value</span></code> which is of type <code class="docutils literal notranslate"><span class="pre">Binary</span></code>, <code class="docutils literal notranslate"><span class="pre">LargeBinary</span></code>, or <code class="docutils literal notranslate"><span class="pre">BinaryView</span></code>. |
| (unshredded variants consist of just the <code class="docutils literal notranslate"><span class="pre">metadata</span></code> and <code class="docutils literal notranslate"><span class="pre">value</span></code> fields only)</p></li> |
| <li><p>A field named <code class="docutils literal notranslate"><span class="pre">typed_value</span></code> which can be a <a class="reference internal" href="#variant-primitive-type-mapping"><span class="std std-ref">Primitive Type Mappings</span></a> or a <code class="docutils literal notranslate"><span class="pre">List</span></code>, <code class="docutils literal notranslate"><span class="pre">LargeList</span></code>, <code class="docutils literal notranslate"><span class="pre">ListView</span></code> or <code class="docutils literal notranslate"><span class="pre">Struct</span></code></p> |
| <ul> |
| <li><p>If the <code class="docutils literal notranslate"><span class="pre">typed_value</span></code> field is a <code class="docutils literal notranslate"><span class="pre">List</span></code>, <code class="docutils literal notranslate"><span class="pre">LargeList</span></code> or <code class="docutils literal notranslate"><span class="pre">ListView</span></code> its elements <strong>must</strong> be <em>non-nullable</em> and <strong>must</strong> |
| be a <code class="docutils literal notranslate"><span class="pre">Struct</span></code> consisting of at least one (or both) of the following:</p> |
| <ul> |
| <li><p>A field named <code class="docutils literal notranslate"><span class="pre">value</span></code> which is of type <code class="docutils literal notranslate"><span class="pre">Binary</span></code>, <code class="docutils literal notranslate"><span class="pre">LargeBinary</span></code>, or <code class="docutils literal notranslate"><span class="pre">BinaryView</span></code>.</p></li> |
| <li><p>A field named <code class="docutils literal notranslate"><span class="pre">typed_value</span></code> which follows the rules outlined above (this allows for arbitrarily nested data).</p></li> |
| </ul> |
| </li> |
| <li><p>If the <code class="docutils literal notranslate"><span class="pre">typed_value</span></code> field is a <code class="docutils literal notranslate"><span class="pre">Struct</span></code>, then its fields <strong>must</strong> be <em>non-nullable</em>, representing the fields being shredded |
| from the objects, and <strong>must</strong> be a <code class="docutils literal notranslate"><span class="pre">Struct</span></code> consisting of at least one (or both) of the following:</p> |
| <ul> |
| <li><p>A field named <code class="docutils literal notranslate"><span class="pre">value</span></code> which is of type <code class="docutils literal notranslate"><span class="pre">Binary</span></code>, <code class="docutils literal notranslate"><span class="pre">LargeBinary</span></code>, or <code class="docutils literal notranslate"><span class="pre">BinaryView</span></code>.</p></li> |
| <li><p>A field named <code class="docutils literal notranslate"><span class="pre">typed_value</span></code> which follows the rules outlined above (this allows for arbitrarily nested data).</p></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li><p>Extension type parameters:</p> |
| <p>This type does not have any parameters.</p> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>Extension metadata is an empty string.</p> |
| </li> |
| </ul> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>It is also <em>permissible</em> for the <code class="docutils literal notranslate"><span class="pre">metadata</span></code> field to be dictionary-encoded with a preferred (<em>but not required</em>) index type of <code class="docutils literal notranslate"><span class="pre">int8</span></code>, |
| or run-end-encoded with a preferred (<em>but not required</em>) runs type of <code class="docutils literal notranslate"><span class="pre">int8</span></code>.</p> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>The fields may be in any order, and thus must be accessed by <strong>name</strong> not by <em>position</em>. The field names are case sensitive.</p> |
| </div> |
| <section id="primitive-type-mappings"> |
| <span id="variant-primitive-type-mapping"></span><h4>Primitive Type Mappings<a class="headerlink" href="#primitive-type-mappings" title="Link to this heading">#</a></h4> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Arrow Primitive Type</p></th> |
| <th class="head"><p>Variant Primitive Type</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p>Null</p></td> |
| <td><p>Null</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Boolean</p></td> |
| <td><p>Boolean (true/false)</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Int8</p></td> |
| <td><p>Int8</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Uint8</p></td> |
| <td><p>Int16</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Int16</p></td> |
| <td><p>Int16</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Uint16</p></td> |
| <td><p>Int32</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Int32</p></td> |
| <td><p>Int32</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Uint32</p></td> |
| <td><p>Int64</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Int64</p></td> |
| <td><p>Int64</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Float</p></td> |
| <td><p>Float</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Double</p></td> |
| <td><p>Double</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Decimal32</p></td> |
| <td><p>decimal4</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Decimal64</p></td> |
| <td><p>decimal8</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Decimal128</p></td> |
| <td><p>decimal16</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Date32</p></td> |
| <td><p>Date</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Time64</p></td> |
| <td><p>TimeNTZ</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Timestamp(us, UTC)</p></td> |
| <td><p>Timestamp (micro)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Timestamp(us)</p></td> |
| <td><p>TimestampNTZ (micro)</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Timestamp(ns, UTC)</p></td> |
| <td><p>Timestamp (nano)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>Timestamp(ns)</p></td> |
| <td><p>TimestampNTZ (nano)</p></td> |
| </tr> |
| <tr class="row-even"><td><p>Binary</p></td> |
| <td><p>Binary</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>LargeBinary</p></td> |
| <td><p>Binary</p></td> |
| </tr> |
| <tr class="row-even"><td><p>BinaryView</p></td> |
| <td><p>Binary</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>String</p></td> |
| <td><p>String</p></td> |
| </tr> |
| <tr class="row-even"><td><p>LargeString</p></td> |
| <td><p>String</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>StringView</p></td> |
| <td><p>String</p></td> |
| </tr> |
| <tr class="row-even"><td><p>UUID extension type</p></td> |
| <td><p>UUID</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </section> |
| </section> |
| <section id="timestamp-with-offset"> |
| <span id="timestamp-with-offset-extension"></span><h3>Timestamp With Offset<a class="headerlink" href="#timestamp-with-offset" title="Link to this heading">#</a></h3> |
| <p>This type represents a timestamp column that stores potentially different timezone offsets per value. The timestamp is stored in UTC alongside the original timezone offset in minutes. |
| This extension type is intended to be compatible with ANSI SQL’s <code class="docutils literal notranslate"><span class="pre">TIMESTAMP</span> <span class="pre">WITH</span> <span class="pre">TIME</span> <span class="pre">ZONE</span></code>, which is supported by multiple database engines.</p> |
| <ul> |
| <li><p>Extension name: <code class="docutils literal notranslate"><span class="pre">arrow.timestamp_with_offset</span></code>.</p></li> |
| <li><p>The storage type of the extension is a <code class="docutils literal notranslate"><span class="pre">Struct</span></code> with 2 fields, in order:</p> |
| <ul class="simple"> |
| <li><p><code class="docutils literal notranslate"><span class="pre">timestamp</span></code>: a non-nullable <code class="docutils literal notranslate"><span class="pre">Timestamp(time_unit,</span> <span class="pre">"UTC")</span></code>, where <code class="docutils literal notranslate"><span class="pre">time_unit</span></code> is any Arrow <code class="docutils literal notranslate"><span class="pre">TimeUnit</span></code> (s, ms, us or ns).</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">offset_minutes</span></code>: a non-nullable signed 16-bit integer (<code class="docutils literal notranslate"><span class="pre">Int16</span></code>) representing the offset in minutes from the UTC timezone. Negative offsets represent time zones west of UTC, while positive offsets represent east. Offsets normally range from -779 (-12:59) to +780 (+13:00).</p></li> |
| </ul> |
| </li> |
| <li><p>Extension type parameters:</p> |
| <p>This type does not have any parameters.</p> |
| </li> |
| <li><p>Description of the serialization:</p> |
| <p>Extension metadata is an empty string.</p> |
| </li> |
| </ul> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>It is also <em>permissible</em> for the <code class="docutils literal notranslate"><span class="pre">offset_minutes</span></code> field to be dictionary-encoded or run-end-encoded.</p> |
| </div> |
| </section> |
| <section id="community-extension-types"> |
| <h3>Community Extension Types<a class="headerlink" href="#community-extension-types" title="Link to this heading">#</a></h3> |
| <p>In addition to the canonical extension types listed above, there exist Arrow |
| extension types that have been established as standards within specific domain |
| areas. These have not been officially designated as canonical through a |
| discussion and vote on the Arrow development mailing list but are well known |
| within subcommunities of Arrow developers.</p> |
| </section> |
| <section id="geoarrow"> |
| <h3>GeoArrow<a class="headerlink" href="#geoarrow" title="Link to this heading">#</a></h3> |
| <p><a class="reference external" href="https://github.com/geoarrow/geoarrow">GeoArrow</a> defines a collection of |
| Arrow extension types for representing vector geometries. It is well known |
| within the Arrow geospatial subcommunity. The GeoArrow specification is not yet |
| finalized.</p> |
| <div class="toctree-wrapper compound"> |
| </div> |
| </section> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer d-print-none"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="Changing.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Changing the Apache Arrow Format Specification</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="CanonicalExtensions/Examples.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Canonical Extension Examples</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <dialog id="pst-secondary-sidebar-modal"></dialog> |
| <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav id="pst-page-toc-nav" class="page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="pst-show_toc_level nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a><ul class="pst-show_toc_level nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#standardization">Standardization</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#making-modifications">Making Modifications</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#official-list">Official List</a><ul class="pst-show_toc_level nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fixed-shape-tensor">Fixed shape tensor</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#variable-shape-tensor">Variable shape tensor</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#json">JSON</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#uuid">UUID</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#opaque">Opaque</a><ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#rationale">Rationale</a></li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#bit-boolean">8-bit Boolean</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#parquet-variant">Parquet Variant</a><ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#primitive-type-mappings">Primitive Type Mappings</a></li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#timestamp-with-offset">Timestamp With Offset</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#community-extension-types">Community Extension Types</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#geoarrow">GeoArrow</a></li> |
| </ul> |
| </li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/main/docs/source/format/CanonicalExtensions.rst"> |
| <i class="fa-solid fa-pencil"></i> |
| |
| |
| |
| Edit on GitHub |
| |
| |
| </a> |
| </div> |
| </div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script defer src="../_static/scripts/bootstrap.js?digest=7f76b32a3354e82990f2"></script> |
| <script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=7f76b32a3354e82990f2"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"> |
| |
| <p class="copyright"> |
| |
| © Copyright 2016-2026 Apache Software Foundation. |
| Apache Arrow, Arrow, Apache, the Apache logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. |
| <br/> |
| |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 9.1.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"> |
| <p class="theme-version"> |
| <!-- # L10n: Setting the PST URL as an argument as this does not need to be localized --> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.17.0. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |