| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="../../" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>Canonical Extension Examples — Apache Arrow v25.0.0.dev2</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; |
| </script> |
| <!-- |
| this give us a css class that will be invisible only if js is disabled |
| --> |
| <noscript> |
| <style> |
| .pst-js-only { display: none !important; } |
| |
| </style> |
| </noscript> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../../_static/styles/theme.css?digest=7f76b32a3354e82990f2" rel="stylesheet" /> |
| <link href="../../_static/styles/pydata-sphinx-theme.css?digest=7f76b32a3354e82990f2" rel="stylesheet" /> |
| |
| <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=03e43079" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/sphinx-design.min.css?v=95c83b7e" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css?v=8dcd28dc" /> |
| |
| <!-- So that users can add custom icons --> |
| <script defer src="../../_static/scripts/fontawesome.js?digest=7f76b32a3354e82990f2"></script> |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=7f76b32a3354e82990f2" /> |
| <link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=7f76b32a3354e82990f2" /> |
| |
| <script src="../../_static/documentation_options.js?v=30184a23"></script> |
| <script src="../../_static/doctools.js?v=fd6eb6e6"></script> |
| <script src="../../_static/sphinx_highlight.js?v=6ffebe34"></script> |
| <script src="../../_static/clipboard.min.js?v=a7894cd8"></script> |
| <script src="../../_static/copybutton.js?v=3bb21c8c"></script> |
| <script src="../../_static/design-tabs.js?v=f930bc37"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'format/CanonicalExtensions/Examples';</script> |
| <script> |
| DOCUMENTATION_OPTIONS.theme_version = '0.17.0'; |
| DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json'; |
| DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/'; |
| DOCUMENTATION_OPTIONS.show_version_warning_banner = |
| true; |
| </script> |
| <script>DOCUMENTATION_OPTIONS.search_as_you_type = false;</script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/format/CanonicalExtensions/Examples.html" /> |
| <link rel="icon" href="../../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../../genindex.html" /> |
| <link rel="search" title="Search" href="../../search.html" /> |
| <link rel="next" title="Other Data Structures" href="../Other.html" /> |
| <link rel="prev" title="Canonical Extension Types" href="../CanonicalExtensions.html" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| <meta name="docsearch:version" content="25.0.0.dev2" /> |
| |
| |
| <script src="../../_static/searchtools.js"></script> |
| <script src="../../_static/language_data.js"></script> |
| <script src="../../searchindex.js"></script> |
| |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| <body data-default-mode=""> |
| |
| |
| <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> |
| |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i>Back to top</button> |
| |
| |
| <dialog id="pst-search-dialog"> |
| |
| <form class="bd-search d-flex align-items-center" |
| action="../../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form> |
| </dialog> |
| |
| <div class="pst-async-banner-revealer d-none"> |
| <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> |
| </div> |
| |
| |
| <header id="pst-header" class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> |
| <div class="bd-header__inner bd-page-width"> |
| <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation"> |
| <span class="fa-solid fa-bars"></span> |
| </button> |
| |
| |
| <div class=" navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| |
| |
| <a class="navbar-brand logo" href="../../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v25.0.0.dev2 - Home"/> |
| <img src="../../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v25.0.0.dev2 - Home"/> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| <div class=" navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="../index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-2" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-2" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-2" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-2"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| |
| <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page"> |
| <span class="fa-solid fa-outdent"></span> |
| </button> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <dialog id="pst-primary-sidebar-modal"></dialog> |
| <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="../index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-3" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-3" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-3" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-3"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item pst-sidebar-collapse"><button id="pst-collapse-sidebar-button" aria-expanded="true" aria-controls="pst-primary-sidebar"><svg class="pst-icon svg-inline--fa" role="img" aria-hidden="true" focusable="false" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"> |
| <path fill="currentColor" d="M3 15.5C2.36232 15.5 1.74874 15.2564 1.28478 14.8189C0.820828 14.3815 0.541576 13.7832 0.504167 13.1467L0.5 13L0.5 3C0.499965 2.36232 0.743605 1.74874 1.18107 1.28478C1.61854 0.820828 2.21676 0.541576 2.85333 0.504167L3 0.5L13 0.5C13.6377 0.499965 14.2513 0.743605 14.7152 1.18107C15.1792 1.61854 15.4584 2.21676 15.4958 2.85333L15.5 3L15.5 13C15.5 13.6377 15.2564 14.2513 14.8189 14.7152C14.3815 15.1792 13.7832 15.4584 13.1467 15.4958L13 15.5L3 15.5ZM3 13.8333L10.5 13.8333L10.5 2.16667L3 2.16667C2.79589 2.16669 2.59889 2.24163 2.44636 2.37726C2.29383 2.5129 2.19638 2.69979 2.1725 2.9025L2.16667 3L2.16667 13C2.16669 13.2041 2.24163 13.4011 2.37726 13.5536C2.5129 13.7062 2.69979 13.8036 2.9025 13.8275L3 13.8333ZM6.65583 10.325L6.5775 10.2558L4.91083 8.58917C4.76735 8.44567 4.68116 8.25476 4.66843 8.05223C4.65569 7.84971 4.71729 7.6495 4.84167 7.48917L4.91083 7.41083L6.5775 5.74417C6.72747 5.59471 6.9287 5.50794 7.14032 5.50148C7.35194 5.49502 7.55809 5.56935 7.7169 5.70937C7.8757 5.8494 7.97525 6.04463 7.99533 6.25539C8.01541 6.46616 7.95451 6.67667 7.825 6.84417L7.75583 6.9225L6.67917 8L7.75583 9.0775C7.89931 9.22099 7.98551 9.41191 7.99824 9.61443C8.01097 9.81695 7.94938 10.0172 7.825 10.1775L7.75583 10.2558C7.61234 10.3993 7.42142 10.4855 7.2189 10.4982C7.01638 10.511 6.81617 10.4494 6.65583 10.325Z"/> |
| </svg> |
| <span class="pst-collapse-sidebar-label">Collapse Sidebar</span> |
| <span class="pst-expand-sidebar-label">Expand Sidebar</span> |
| </button></div> |
| <div class="sidebar-primary-item"> |
| |
| <nav class="bd-docs-nav bd-links" |
| aria-label="Section Navigation"> |
| <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p> |
| <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="../Intro.html">Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Columnar.html">Arrow Columnar Format</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Versioning.html">Format Versioning and Stability</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Changing.html">Changing the Apache Arrow Format Specification</a></li> |
| <li class="toctree-l1 current active has-children"><a class="reference internal" href="../CanonicalExtensions.html">Canonical Extension Types</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current"> |
| <li class="toctree-l2 current active"><a class="current reference internal" href="#">Canonical Extension Examples</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Other.html">Other Data Structures</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="../CDataInterface.html">The Arrow C data interface</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="../CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="../CStreamInterface.html">The Arrow C stream interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../CDeviceDataInterface.html">The Arrow C Device data interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../StatisticsSchema.html">Statistics schema</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../DissociatedIPC.html">Dissociated IPC Protocol</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Flight.html">Arrow Flight RPC</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../FlightSql.html">Arrow Flight SQL</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../ADBC.html">ADBC: Arrow Database Connectivity</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Security.html">Security Considerations</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Integration.html">Integration Testing</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../Glossary.html">Glossary</a></li> |
| </ul> |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <div id="ethical-ad-placement" |
| class="flat" |
| data-ea-publisher="readthedocs" |
| data-ea-type="readthedocs-sidebar" |
| data-ea-manual="true"> |
| </div></div> |
| </div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main" role="main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article d-print-none"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| <nav aria-label="Breadcrumb" class="d-print-none"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="../index.html" class="nav-link">Specifications</a></li> |
| |
| |
| <li class="breadcrumb-item"><a href="../CanonicalExtensions.html" class="nav-link">Canonical Extension Types</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Canonical Extension Examples</span></li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <section id="canonical-extension-examples"> |
| <span id="format-canonical-extension-examples"></span><h1>Canonical Extension Examples<a class="headerlink" href="#canonical-extension-examples" title="Link to this heading">#</a></h1> |
| <section id="parquet-variant-extension"> |
| <h2>Parquet Variant Extension<a class="headerlink" href="#parquet-variant-extension" title="Link to this heading">#</a></h2> |
| <section id="unshredded"> |
| <h3>Unshredded<a class="headerlink" href="#unshredded" title="Link to this heading">#</a></h3> |
| <p>The simplest case, an unshredded variant always consists of <strong>exactly</strong> two fields: <code class="docutils literal notranslate"><span class="pre">metadata</span></code> and <code class="docutils literal notranslate"><span class="pre">value</span></code>. Any of |
| the following storage types are valid (not an exhaustive list):</p> |
| <ul class="simple"> |
| <li><p><code class="docutils literal notranslate"><span class="pre">struct<metadata:</span> <span class="pre">binary</span> <span class="pre">non-nullable,</span> <span class="pre">value:</span> <span class="pre">binary</span> <span class="pre">nullable></span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">struct<value:</span> <span class="pre">binary</span> <span class="pre">nullable,</span> <span class="pre">metadata:</span> <span class="pre">binary</span> <span class="pre">non-nullable></span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">struct<metadata:</span> <span class="pre">dictionary<int8,</span> <span class="pre">binary></span> <span class="pre">non-nullable,</span> <span class="pre">value:</span> <span class="pre">binary_view</span> <span class="pre">nullable></span></code></p></li> |
| </ul> |
| </section> |
| <section id="simple-shredding"> |
| <h3>Simple Shredding<a class="headerlink" href="#simple-shredding" title="Link to this heading">#</a></h3> |
| <p>Suppose we have a Variant field named <em>measurement</em> and we want to shred the <code class="docutils literal notranslate"><span class="pre">int64</span></code> values into a separate column for efficiency. |
| In Parquet, this could be represented as:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">required</span> <span class="n">group</span> <span class="n">measurement</span> <span class="p">(</span><span class="n">VARIANT</span><span class="p">)</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">binary</span> <span class="n">metadata</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">int64</span> <span class="n">typed_value</span><span class="p">;</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Thus the corresponding storage type for the <code class="docutils literal notranslate"><span class="pre">arrow.parquet.variant</span></code> Arrow extension type would be:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">metadata</span><span class="p">:</span> <span class="n">binary</span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">int64</span> <span class="n">nullable</span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>If we suppose a series of measurements consisting of:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">34</span><span class="p">,</span> <span class="n">null</span><span class="p">,</span> <span class="s2">"n/a"</span><span class="p">,</span> <span class="mi">100</span> |
| </pre></div> |
| </div> |
| <p>The data should be stored/represented in Arrow as:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>* Length: 4, Null count: 1 |
| * Validity Bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|---------------| |
| | 00001011 | 0 (padding) | |
| |
| * Children arrays: |
| * field-0 array (`VarBinary`) |
| * Length: 4, Null count: 0 |
| * Offsets buffer: |
| |
| | Bytes 0-19 | Bytes 20-63 | |
| |------------------|--------------------------| |
| | 0, 2, 4, 6, 8 | unspecified (padding) | |
| |
| * Value buffer: (01 00 -> indicates version 1 empty metadata) |
| |
| | Bytes 0-7 | Bytes 8-63 | |
| |-------------------------|--------------------------| |
| | 01 00 01 00 01 00 01 00 | unspecified (padding) | |
| |
| * field-1 array (`VarBinary`) |
| * Length: 4, Null count: 2 |
| * Validity Bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|---------------| |
| | 00000110 | 0 (padding) | |
| |
| * Offsets buffer: |
| |
| | Bytes 0-19 | Bytes 20-63 | |
| |------------------|--------------------------| |
| | 0, 0, 1, 5, 5 | unspecified (padding) | |
| |
| * Value buffer: (`00` -> null, |
| `0x13 0x6E 0x2F 0x61` -> variant encoding literal string "n/a") |
| |
| | Bytes 0-4 | Bytes 5-63 | |
| |------------------------|--------------------------| |
| | 00 0x13 0x6E 0x2F 0x61 | unspecified (padding) | |
| |
| * field-2 array (int64 array) |
| * Length: 4, Null count: 2 |
| * Validity Bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|---------------| |
| | 00001001 | 0 (padding) | |
| |
| * Value buffer: |
| |
| | Bytes 0-31 | Bytes 32-63 | |
| |---------------------|--------------------------| |
| | 34, 00, 00, 100 | unspecified (padding) | |
| </pre></div> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Notice that there is a variant <code class="docutils literal notranslate"><span class="pre">literal</span> <span class="pre">null</span></code> in the <code class="docutils literal notranslate"><span class="pre">value</span></code> array, this is due to the |
| <a class="reference external" href="https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding">shredding specification</a> |
| so that a consumer can tell the difference between a <em>missing</em> field and a <em>null</em> field. A null |
| element must be encoded as a Variant null: <em>basic type</em> <code class="docutils literal notranslate"><span class="pre">0</span></code> (primitive) and <em>physical type</em> <code class="docutils literal notranslate"><span class="pre">0</span></code> (null).</p> |
| </div> |
| </section> |
| <section id="shredding-an-array"> |
| <h3>Shredding an Array<a class="headerlink" href="#shredding-an-array" title="Link to this heading">#</a></h3> |
| <p>For our next example, we will represent a shredded array of strings. Let’s consider a column that looks like:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span><span class="s2">"comedy"</span><span class="p">,</span> <span class="s2">"drama"</span><span class="p">],</span> <span class="p">[</span><span class="s2">"horror"</span><span class="p">,</span> <span class="n">null</span><span class="p">],</span> <span class="p">[</span><span class="s2">"comedy"</span><span class="p">,</span> <span class="s2">"drama"</span><span class="p">,</span> <span class="s2">"romance"</span><span class="p">],</span> <span class="n">null</span> |
| </pre></div> |
| </div> |
| <p>Representing this shredded variant in Parquet could look like:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">optional</span> <span class="n">group</span> <span class="n">tags</span> <span class="p">(</span><span class="n">VARIANT</span><span class="p">)</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">binary</span> <span class="n">metadata</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">group</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">LIST</span><span class="p">)</span> <span class="p">{</span> <span class="c1"># optional to allow null lists</span> |
| <span class="n">repeated</span> <span class="n">group</span> <span class="nb">list</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">element</span> <span class="p">{</span> <span class="c1"># shredded element</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">STRING</span><span class="p">);</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The array structure for Variant encoding does not allow missing elements, so all elements of the array must |
| be <em>non-nullable</em>. As such, either <strong>typed_value</strong> or <strong>value</strong> (<em>but not both!</em>) must be <em>non-null</em>.</p> |
| <p>The storage type to represent this in Arrow as a Variant extension type would be:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">metadata</span><span class="p">:</span> <span class="n">binary</span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">element</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">string</span> <span class="n">nullable</span> |
| <span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="o">></span> <span class="n">nullable</span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>As usual, <strong>Binary</strong> could also be <strong>LargeBinary</strong> or <strong>BinaryView</strong>, <strong>String</strong> could also be <strong>LargeString</strong> or <strong>StringView</strong>, |
| and <strong>List</strong> could also be <strong>LargeList</strong> or <strong>ListView</strong>.</p> |
| </div> |
| <p>The data would then be stored in Arrow as follows:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>* Length: 4, Null count: 1 |
| * Validity Bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|---------------| |
| | 00000111 | 0 (padding) | |
| |
| * Children arrays: |
| * field-0 array (`VarBinary` metadata) |
| * Length: 4, Null count: 0 |
| * Offsets buffer: |
| |
| | Bytes 0-19 | Bytes 20-63 | |
| |------------------|--------------------------| |
| | 0, 2, 4, 6, 8 | unspecified (padding) | |
| |
| * Value buffer: (01 00 -> indicates version 1 empty metadata) |
| |
| | Bytes 0-7 | Bytes 8-63 | |
| |-------------------------|--------------------------| |
| | 01 00 01 00 01 00 01 00 | unspecified (padding) | |
| |
| * field-1 array (`VarBinary` value) |
| * Length: 4, Null count: 1 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|---------------| |
| | 00001000 | 0 (padding) | |
| |
| * Offsets buffer: |
| |
| | Bytes 0-19 | Bytes 20-63 | |
| |------------------|--------------------------| |
| | 0, 0, 0, 0, 1 | unspecified (padding) | |
| |
| * Value buffer: (00 -> variant null) |
| |
| | Bytes 0 | Bytes 1-63 | |
| |--------------------|--------------------------| |
| | 00 | unspecified (padding) | |
| |
| * field-2 array (`List<Struct<VarBinary, String>>` typed_value) |
| * Length: 4, Null count: 1 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|-------------| |
| | 00000111 | 0 (padding) | |
| |
| * Offsets buffer (int32) |
| |
| | Bytes 0-19 | Bytes 20-63 | |
| |-------------------|-----------------------| |
| | 0, 2, 4, 7, 7 | unspecified (padding) | |
| |
| * Values array (`Struct<VarBinary, String>` element): |
| * Length: 7, Null count: 0 |
| * Validity bitmap buffer: Not required |
| |
| * Children arrays: |
| * field-0 array (`VarBinary` value) |
| * Length: 7, Null count: 6 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|-------------| |
| | 00001000 | 0 (padding) | |
| |
| * Offsets buffer (int32): |
| |
| | Bytes 0-31 | Bytes 32-63 | |
| |---------------------------|--------------------------| |
| | 0, 0, 0, 0, 1, 1, 1, 1 | unspecified (padding) | |
| |
| * Values buffer (`00` -> variant null): |
| |
| | Bytes 0 | Bytes 1-63 | |
| |--------------------|--------------------------| |
| | 00 | unspecified (padding) | |
| |
| * field-1 array (`String` typed_value) |
| * Length: 7, Null count: 1 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Bytes 1-63 | |
| |--------------------------|-------------| |
| | 01110111 | 0 (padding) | |
| |
| * Offsets buffer (int32): |
| |
| | Bytes 0-31 | Bytes 32-63 | |
| |---------------------------------|--------------------------| |
| | 0, 6, 11, 17, 17, 23, 28, 35 | unspecified (padding) | |
| |
| * Values buffer: |
| |
| | Bytes 0-35 | Bytes 36-63 | |
| |--------------------------------------|--------------------------| |
| | comedydramahorrorcomedydramaromance | unspecified (padding) | |
| </pre></div> |
| </div> |
| </section> |
| <section id="shredding-an-object"> |
| <h3>Shredding an Object<a class="headerlink" href="#shredding-an-object" title="Link to this heading">#</a></h3> |
| <p>Let’s consider a JSON column of “events” which contain a field named <code class="docutils literal notranslate"><span class="pre">event_type</span></code> (a string) |
| and a field named <code class="docutils literal notranslate"><span class="pre">event_ts</span></code> (a timestamp) that we wish to shred into separate columns, In Parquet, |
| it could look something like this:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">optional</span> <span class="n">group</span> <span class="n">event</span> <span class="p">(</span><span class="n">VARIANT</span><span class="p">)</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">binary</span> <span class="n">metadata</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> <span class="c1"># variant, remaining fields/values</span> |
| <span class="n">optional</span> <span class="n">group</span> <span class="n">typed_value</span> <span class="p">{</span> <span class="c1"># shredded fields for variant object</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">event_type</span> <span class="p">{</span> <span class="c1"># event_type shredded field</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">STRING</span><span class="p">);</span> |
| <span class="p">}</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">event_ts</span> <span class="p">{</span> <span class="c1"># event_ts shredded field</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">int64</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">TIMESTAMP</span><span class="p">(</span><span class="n">true</span><span class="p">,</span> <span class="n">MICROS</span><span class="p">))</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>We can then translate this into the expected extension storage type:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">metadata</span><span class="p">:</span> <span class="n">binary</span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">event_type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">string</span> <span class="n">nullable</span> |
| <span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">event_ts</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">(</span><span class="n">us</span><span class="p">,</span> <span class="n">UTC</span><span class="p">)</span> <span class="n">nullable</span> |
| <span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span> |
| <span class="o">></span> <span class="n">nullable</span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>If a field <em>does not exist</em> in the variant object value, then both the <strong>value</strong> and <strong>typed_value</strong> columns for that row |
| will be null. If a field is <em>present</em>, but the value is null, then <strong>value</strong> must contain a Variant null.</p> |
| <p>It is <em>invalid</em> for both <strong>value</strong> and <strong>typed_value</strong> to be non-null for a given index. A reader can choose not to error |
| in this scenario, but if so it <strong>must</strong> use the value in the <strong>typed_value</strong> column for that index.</p> |
| <p>Let’s consider the following series of objects:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="s2">"event_type"</span><span class="p">:</span> <span class="s2">"noop"</span><span class="p">,</span> <span class="s2">"event_ts"</span><span class="p">:</span> <span class="mi">1729794114937</span><span class="p">}</span> |
| |
| <span class="p">{</span><span class="s2">"event_type"</span><span class="p">:</span> <span class="s2">"login"</span><span class="p">,</span> <span class="s2">"event_ts"</span><span class="p">:</span> <span class="mi">1729794146402</span><span class="p">,</span> <span class="s2">"email"</span><span class="p">:</span> <span class="s2">"user@example.com"</span><span class="p">}</span> |
| |
| <span class="p">{</span><span class="s2">"error_msg"</span><span class="p">:</span> <span class="s2">"malformed..."</span><span class="p">}</span> |
| |
| <span class="s2">"malformed: not an object"</span> |
| |
| <span class="p">{</span><span class="s2">"event_ts"</span><span class="p">:</span> <span class="mi">1729794240241</span><span class="p">,</span> <span class="s2">"click"</span><span class="p">:</span> <span class="s2">"_button"</span><span class="p">}</span> |
| |
| <span class="p">{</span><span class="s2">"event_ts"</span><span class="p">:</span> <span class="n">null</span><span class="p">,</span> <span class="s2">"event_ts"</span><span class="p">:</span> <span class="mi">1729794954163</span><span class="p">}</span> |
| |
| <span class="p">{</span><span class="s2">"event_type"</span><span class="p">:</span> <span class="s2">"noop"</span><span class="p">,</span> <span class="s2">"event_ts"</span><span class="p">:</span> <span class="s2">"2024-10-24"</span><span class="p">}</span> |
| |
| <span class="p">{}</span> |
| |
| <span class="n">null</span> |
| |
| <span class="o">*</span><span class="n">Entirely</span> <span class="n">missing</span><span class="o">*</span> |
| </pre></div> |
| </div> |
| <p>To represent those values as a column of Variant values using the Variant extension type we get the following:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>* Length: 10, Null count: 1 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |--------------------------|-----------|-----------------------| |
| | 11111111 | 00000001 | 0 (padding) | |
| |
| * Children arrays |
| * field-0 array (`VarBinary` Metadata) |
| * Length: 10, Null count: 0 |
| * Offsets buffer: |
| |
| | Bytes 0-43 (int32) | Bytes 44-63 | |
| |------------------------------------------|-------------------------| |
| | 0, 2, 11, 24, 26, 35, 37, 39, 41, 43, 45 | unspecified (padding) | |
| |
| * Value buffer: (01 00 -> version 1 empty metadata, |
| 01 01 00 XX ... -> Version 1, metadata with 1 elem, offset 0, offset XX == len(string), ... is dict string bytes) |
| |
| | Bytes 0-1 | Bytes 2-10 | Bytes 11-23 | Bytes 24-25 | Bytes 26-34 | |
| |-------------------------------|-----------------------|-------------|-------------------| |
| | 01 00 | 01 01 00 05 email | 01 01 00 09 error_msg | 01 00 | 01 01 00 05 click | |
| |
| | Bytes 35-36 | Bytes 37-38 | Bytes 39-40 | Bytes 41-42 | Bytes 43-44 | Bytes 45-63 | |
| |-------------|-------------|-------------|-------------|-------------|-----------------------| |
| | 01 00 | 01 00 | 01 00 | 01 00 | 01 00 | unspecified (padding) | |
| |
| * field-1 array (`VarBinary` Value) |
| * Length: 10, Null count: 5 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |---------------------------|-----------|-----------------------| |
| | 00011110 | 00000001 | 0 (padding) | |
| |
| * Offsets buffer (filled in based on lengths of encoded variants): |
| |
| | ... | |
| |
| * Value buffer: |
| |
| | VariantEncode({"email": "user@email.com"}) | VariantEncode({"error_msg": "malformed..."}) | |
| | VariantEncode("malformed: not an object") | VariantEncode({"click": "_button"}) | 00 (null) | |
| |
| * field-2 array (`Struct<...>` typed_value) |
| * Length: 10, Null count: 3 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |--------------------------|-----------|-----------------------| |
| | 11110111 | 00000000 | 0 (padding) | |
| |
| * Children arrays: |
| * field-0 array (`Struct<VarBinary, String>` event_type) |
| * Length: 10, Null count: 0 |
| * Validity bitmap buffer: not required |
| |
| * Children arrays |
| * field-0 array (`VarBinary` value) |
| * Length: 10, Null count: 9 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |--------------------------|-----------|-----------------------| |
| | 01000000 | 00000000 | 0 (padding) | |
| |
| * Offsets buffer (int32) |
| |
| | Bytes 0-43 (int32) | Bytes 44-63 | |
| |---------------------------------|-------------------------| |
| | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 | unspecified (padding) | |
| |
| * Value buffer: |
| |
| | Byte 0 | Bytes 1-63 | |
| |--------|------------------------| |
| | 00 | unspecified (padding) | |
| |
| * field-1 array (`String` typed_value) |
| * Length: 10, Null count: 7 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |--------------------------|-----------|-----------------------| |
| | 01000011 | 00000000 | 0 (padding) | |
| |
| * Offsets buffer (int32) |
| |
| | Byte 0-43 | Bytes 44-63 | |
| |-------------------------------------|------------------------| |
| | 0, 4, 9, 9, 9, 9, 9, 13, 13, 13, 13 | unspecified (padding) | |
| |
| * Value buffer: |
| |
| | Bytes 0-3 | Bytes 4-8 | Bytes 9-12 | Bytes 13-63 | |
| |-----------|-----------|------------|------------------------| |
| | noop | login | noop | unspecified (padding) | |
| |
| |
| * field-1 array (`Struct<VarBinary, Timestamp>` event_ts) |
| * Length: 10, Null count: 0 |
| * Validity bitmap buffer: not required |
| |
| * Children arrays |
| * field-0 array (`VarBinary` value) |
| * Length: 10, Null count: 9 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |--------------------------|-----------|-----------------------| |
| | 01000000 | 00000000 | 0 (padding) | |
| |
| * Offsets buffer (int32) |
| |
| | Bytes 0-43 (int32) | Bytes 44-63 | |
| |---------------------------------|-------------------------| |
| | ... | unspecified (padding) | |
| |
| * Value buffer: |
| |
| | VariantEncode("2024-10-24") | |
| |
| * field-1 array (`Timestamp(us, UTC)` typed_value) |
| * Length: 10, Null count: 6 |
| * Validity bitmap buffer: |
| |
| | Byte 0 (validity bitmap) | Byte 1 | Bytes 2-63 | |
| |--------------------------|-----------|-----------------------| |
| | 00110011 | 00000000 | 0 (padding) | |
| |
| * Value buffer: |
| |
| | Bytes 0-7 | Bytes 8-15 | Bytes 16-31 | Bytes 32-39 | Bytes 40-47 | Bytes 48-63 | |
| |---------------|---------------|--------------|---------------|---------------|------------------------| |
| | 1729794114937 | 1729794146402 | unspecified | 1729794240241 | 1729794954163 | unspecified (padding) | |
| </pre></div> |
| </div> |
| </section> |
| <section id="putting-it-all-together"> |
| <h3>Putting it all together<a class="headerlink" href="#putting-it-all-together" title="Link to this heading">#</a></h3> |
| <p>As mentioned, the <strong>typed_value</strong> field associated with a Variant <strong>value</strong> can be of any shredded type. As a result, |
| as long as we follow the original rules we can have an arbitrary number of nested levels based on how you want to |
| shred the object. For example, we might have a few more fields alongside <strong>event_type</strong> to shred out. Possibly an object |
| that looks like this:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"event_type"</span><span class="p">:</span> <span class="s2">"login"</span><span class="p">,</span> |
| <span class="s2">"event_ts"</span><span class="p">:</span> <span class="mi">1729794114937</span><span class="p">,</span> |
| <span class="s2">"location”: {"</span><span class="n">longitude</span><span class="s2">": 1.5, "</span><span class="n">latitude</span><span class="s2">": 5.5},</span> |
| <span class="s2">"tags"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"foo"</span><span class="p">,</span> <span class="s2">"bar"</span><span class="p">,</span> <span class="s2">"baz"</span><span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>If we shred the extra fields out and represent it as Parquet it looks like:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">optional</span> <span class="n">group</span> <span class="n">event</span> <span class="p">(</span><span class="n">VARIANT</span><span class="p">)</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">binary</span> <span class="n">metadata</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> <span class="c1"># variant, remaining fields/values</span> |
| <span class="n">optional</span> <span class="n">group</span> <span class="n">typed_value</span> <span class="p">{</span> <span class="c1"># shredded fields for variant object</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">event_type</span> <span class="p">{</span> <span class="c1"># event_type shredded field</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">STRING</span><span class="p">);</span> |
| <span class="p">}</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">event_ts</span> <span class="p">{</span> <span class="c1"># event_ts shredded field</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">int64</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">TIMESTAMP</span><span class="p">(</span><span class="n">true</span><span class="p">,</span> <span class="n">MICROS</span><span class="p">))</span> |
| <span class="p">}</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">location</span> <span class="p">{</span> <span class="c1"># location shredded field</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">group</span> <span class="n">typed_value</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">longitude</span> <span class="p">{</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">float64</span> <span class="n">typed_value</span><span class="p">;</span> |
| <span class="p">}</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">latitude</span> <span class="p">{</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">float64</span> <span class="n">typed_value</span><span class="p">;</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">tags</span> <span class="p">{</span> <span class="c1"># tags shredded field</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">group</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">LIST</span><span class="p">)</span> <span class="p">{</span> |
| <span class="n">repeated</span> <span class="n">group</span> <span class="nb">list</span> <span class="p">{</span> |
| <span class="n">required</span> <span class="n">group</span> <span class="n">element</span> <span class="p">{</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">value</span><span class="p">;</span> |
| <span class="n">optional</span> <span class="n">binary</span> <span class="n">typed_value</span> <span class="p">(</span><span class="n">STRING</span><span class="p">);</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Finally, following the rules we set forth on constructing the Variant Extension Type storage type, we end up with:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">metadata</span><span class="p">:</span> <span class="n">binary</span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">event_type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> <span class="n">typed_value</span><span class="p">:</span> <span class="n">string</span> <span class="n">nullable</span><span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">event_ts</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> <span class="n">typed_value</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">(</span><span class="n">us</span><span class="p">,</span> <span class="n">UTC</span><span class="p">)</span> <span class="n">nullable</span><span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">location</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">longitude</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> <span class="n">typed_value</span><span class="p">:</span> <span class="n">double</span> <span class="n">nullable</span><span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">latitude</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> <span class="n">typed_value</span><span class="p">:</span> <span class="n">double</span> <span class="n">nullable</span><span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span> |
| <span class="o">></span> <span class="n">nullable</span><span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="p">,</span> |
| <span class="n">tags</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span> |
| <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> |
| <span class="n">typed_value</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">struct</span><span class="o"><</span><span class="n">value</span><span class="p">:</span> <span class="n">binary</span> <span class="n">nullable</span><span class="p">,</span> <span class="n">typed_value</span><span class="p">:</span> <span class="n">string</span> <span class="n">nullable</span><span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span><span class="o">></span> <span class="n">nullable</span> |
| <span class="o">></span> <span class="n">non</span><span class="o">-</span><span class="n">nullable</span> |
| <span class="o">></span> <span class="n">nullable</span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| </section> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer d-print-none"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="../CanonicalExtensions.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Canonical Extension Types</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="../Other.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Other Data Structures</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <dialog id="pst-secondary-sidebar-modal"></dialog> |
| <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav id="pst-page-toc-nav" class="page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="pst-show_toc_level nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#parquet-variant-extension">Parquet Variant Extension</a><ul class="pst-show_toc_level nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#unshredded">Unshredded</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#simple-shredding">Simple Shredding</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#shredding-an-array">Shredding an Array</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#shredding-an-object">Shredding an Object</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-it-all-together">Putting it all together</a></li> |
| </ul> |
| </li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/main/docs/source/format/CanonicalExtensions/Examples.rst"> |
| <i class="fa-solid fa-pencil"></i> |
| |
| |
| |
| Edit on GitHub |
| |
| |
| </a> |
| </div> |
| </div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script defer src="../../_static/scripts/bootstrap.js?digest=7f76b32a3354e82990f2"></script> |
| <script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=7f76b32a3354e82990f2"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"> |
| |
| <p class="copyright"> |
| |
| © Copyright 2016-2026 Apache Software Foundation. |
| Apache Arrow, Arrow, Apache, the Apache logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. |
| <br/> |
| |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 9.1.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"> |
| <p class="theme-version"> |
| <!-- # L10n: Setting the PST URL as an argument as this does not need to be localized --> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.17.0. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |