| |
| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> |
| |
| <title>Integration Testing — Apache Arrow v17.0.0.dev52</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; |
| </script> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" /> |
| <link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" /> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" /> |
| |
| |
| <link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" /> |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" /> |
| |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" /> |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" /> |
| <script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script> |
| |
| <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script> |
| <script src="../_static/doctools.js"></script> |
| <script src="../_static/sphinx_highlight.js"></script> |
| <script src="../_static/clipboard.min.js"></script> |
| <script src="../_static/copybutton.js"></script> |
| <script src="../_static/design-tabs.js"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'format/Integration';</script> |
| <script> |
| DOCUMENTATION_OPTIONS.theme_version = '0.15.2'; |
| DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json'; |
| DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/'; |
| DOCUMENTATION_OPTIONS.show_version_warning_banner = true; |
| </script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/format/Integration.html" /> |
| <link rel="icon" href="../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Glossary" href="Glossary.html" /> |
| <link rel="prev" title="Changing the Apache Arrow Format Specification" href="Changing.html" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a> |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i> |
| Back to top |
| </button> |
| |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__primary" |
| id="__primary"/> |
| <label class="overlay overlay-primary" for="__primary"></label> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__secondary" |
| id="__secondary"/> |
| <label class="overlay overlay-secondary" for="__secondary"></label> |
| |
| <div class="search-button__wrapper"> |
| <div class="search-button__overlay"></div> |
| <div class="search-button__search-container"> |
| <form class="bd-search d-flex align-items-center" |
| action="../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| id="search-input" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form></div> |
| </div> |
| |
| <header class="bd-header navbar navbar-expand-lg bd-navbar"> |
| <div class="bd-header__inner bd-page-width"> |
| <label class="sidebar-toggle primary-toggle" for="__primary"> |
| <span class="fa-solid fa-bars"></span> |
| </label> |
| |
| |
| <div class="col-lg-3 navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| <a class="navbar-brand logo" href="../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/> |
| <script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| <div class="col-lg-9 navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav class="navbar-nav"> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links"> |
| Implementations |
| </button> |
| <ul id="pst-nav-more-links" class="dropdown-menu"> |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html"> |
| C/GLib |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../cpp/index.html"> |
| C++ |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md"> |
| C# |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17"> |
| Go |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../java/index.html"> |
| Java |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../js/index.html"> |
| JavaScript |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/"> |
| Julia |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md"> |
| MATLAB |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/"> |
| nanoarrow |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../python/index.html"> |
| Python |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../r/index.html"> |
| R |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md"> |
| Ruby |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/"> |
| Rust |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../status.html"> |
| Implementation Status |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/"> |
| C++ cookbook |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/"> |
| Java cookbook |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/"> |
| Python cookbook |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/"> |
| R cookbook |
| </a> |
| </li> |
| |
| </ul> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <script> |
| document.write(` |
| <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <div class="version-switcher__container dropdown"> |
| <button id="pst-version-switcher-button-2" |
| type="button" |
| class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-2" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-2" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-2"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"> |
| |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span> |
| <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span> |
| <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span> |
| <span class="sr-only">X</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <script> |
| document.write(` |
| <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| |
| <label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0"> |
| <span class="fa-solid fa-outdent"></span> |
| </label> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <div class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav class="navbar-nav"> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2"> |
| Implementations |
| </button> |
| <ul id="pst-nav-more-links-2" class="dropdown-menu"> |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html"> |
| C/GLib |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../cpp/index.html"> |
| C++ |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md"> |
| C# |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17"> |
| Go |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../java/index.html"> |
| Java |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../js/index.html"> |
| JavaScript |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/"> |
| Julia |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md"> |
| MATLAB |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/"> |
| nanoarrow |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../python/index.html"> |
| Python |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../r/index.html"> |
| R |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md"> |
| Ruby |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/"> |
| Rust |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-internal" href="../status.html"> |
| Implementation Status |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/"> |
| C++ cookbook |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/"> |
| Java cookbook |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/"> |
| Python cookbook |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/"> |
| R cookbook |
| </a> |
| </li> |
| |
| </ul> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <div class="version-switcher__container dropdown"> |
| <button id="pst-version-switcher-button-3" |
| type="button" |
| class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-3" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-3" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-3"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"> |
| |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span> |
| <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span> |
| <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span> |
| <span class="sr-only">X</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <nav class="bd-docs-nav bd-links" |
| aria-label="Section Navigation"> |
| <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p> |
| <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="Versioning.html">Format Versioning and Stability</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Columnar.html">Arrow Columnar Format</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CanonicalExtensions.html">Canonical Extension Types</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Other.html">Other Data Structures</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="CDataInterface.html">The Arrow C data interface</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul> |
| <li class="toctree-l2"><a class="reference internal" href="CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="CStreamInterface.html">The Arrow C stream interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CDeviceDataInterface.html">The Arrow C Device data interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Flight.html">Arrow Flight RPC</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="FlightSql.html">Arrow Flight SQL</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="ADBC.html">ADBC: Arrow Database Connectivity</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul> |
| <li class="toctree-l2"><a class="reference internal" href="ADBC/C.html">ADBC C API Specification</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="ADBC/Go.html">ADBC Go API Specification</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="ADBC/Java.html">ADBC Java API Specification</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="Changing.html">Changing the Apache Arrow Format Specification</a></li> |
| <li class="toctree-l1 current active"><a class="current reference internal" href="#">Integration Testing</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Glossary.html">Glossary</a></li> |
| </ul> |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| </div> |
| |
| <div id="rtd-footer-container"></div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| |
| |
| <nav aria-label="Breadcrumb"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="index.html" class="nav-link">Specifications</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page">Integration Testing</li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <section id="integration-testing"> |
| <span id="format-integration-testing"></span><h1>Integration Testing<a class="headerlink" href="#integration-testing" title="Permalink to this heading">#</a></h1> |
| <p>To ensure Arrow implementations are interoperable between each other, |
| the Arrow project includes cross-language integration tests which are |
| regularly run as Continuous Integration tasks.</p> |
| <p>The integration tests exercise compliance with several Arrow specifications: |
| the <a class="reference internal" href="Columnar.html#format-ipc"><span class="std std-ref">IPC format</span></a>, the <a class="reference internal" href="Flight.html#flight-rpc"><span class="std std-ref">Flight RPC</span></a> protocol, |
| and the <a class="reference internal" href="CDataInterface.html#c-data-interface"><span class="std std-ref">C Data Interface</span></a>.</p> |
| <section id="strategy"> |
| <h2>Strategy<a class="headerlink" href="#strategy" title="Permalink to this heading">#</a></h2> |
| <p>Our strategy for integration testing between Arrow implementations is:</p> |
| <ul class="simple"> |
| <li><p>Test datasets are specified in a custom human-readable, |
| <a class="reference internal" href="#format-json-integration"><span class="std std-ref">JSON-based format</span></a> designed exclusively |
| for Arrow’s integration tests.</p></li> |
| <li><p>The JSON files are generated by the integration test harness. Different |
| files are used to represent different data types and features, such as |
| numerics, lists, dictionary encoding, etc. This makes it easier to pinpoint |
| incompatibilities than if all data types were represented in a single file.</p></li> |
| <li><p>Each implementation provides entry points capable of converting |
| between the JSON and the Arrow in-memory representation, and of exposing |
| Arrow in-memory data using the desired format.</p></li> |
| <li><p>Each format (whether Arrow IPC, Flight or the C Data Interface) is tested for |
| all supported pairs of (producer, consumer) implementations. The producer |
| typically reads a JSON file, converts it to in-memory Arrow data, and exposes |
| this data using the format under test. The consumer reads the data in the |
| said format and converts it back to Arrow in-memory data; it also reads |
| the same JSON file as the producer, and validates that both datasets are |
| identical.</p></li> |
| </ul> |
| <section id="example-ipc-format"> |
| <h3>Example: IPC format<a class="headerlink" href="#example-ipc-format" title="Permalink to this heading">#</a></h3> |
| <p>Let’s say we are testing Arrow C++ as a producer and Arrow Java as a consumer |
| of the Arrow IPC format. Testing a JSON file would go as follows:</p> |
| <ol class="arabic simple"> |
| <li><p>A C++ executable reads the JSON file, converts it into Arrow in-memory data |
| and writes an Arrow IPC file (the file paths are typically given on the command |
| line).</p></li> |
| <li><p>A Java executable reads the JSON file, converts it into Arrow in-memory data; |
| it also reads the Arrow IPC file generated by C++. Finally, it validates that |
| both Arrow in-memory datasets are equal.</p></li> |
| </ol> |
| </section> |
| <section id="example-c-data-interface"> |
| <h3>Example: C Data Interface<a class="headerlink" href="#example-c-data-interface" title="Permalink to this heading">#</a></h3> |
| <p>Now, let’s say we are testing Arrow Go as a producer and Arrow C# as a consumer |
| of the Arrow C Data Interface.</p> |
| <ol class="arabic simple"> |
| <li><p>The integration testing harness allocates a C |
| <a class="reference internal" href="CDataInterface.html#c-data-interface-struct-defs"><span class="std std-ref">ArrowArray</span></a> structure on the heap.</p></li> |
| <li><p>A Go in-process entrypoint (for example a C-compatible function call) |
| reads a JSON file and exports one of its <a class="reference internal" href="Glossary.html#term-record-batch"><span class="xref std std-term">record batches</span></a> |
| into the <code class="docutils literal notranslate"><span class="pre">ArrowArray</span></code> structure.</p></li> |
| <li><p>A C# in-process entrypoint reads the same JSON file, converts the |
| same record batch into Arrow in-memory data; it also imports the |
| record batch exported by Arrow Go in the <code class="docutils literal notranslate"><span class="pre">ArrowArray</span></code> structure. |
| It validates that both record batches are equal, and then releases the |
| imported record batch.</p></li> |
| <li><p>Depending on the implementation languages’ abilities, the integration |
| testing harness may assert that memory consumption remained identical |
| (i.e., that the exported record batch didn’t leak).</p></li> |
| <li><p>At the end, the integration testing harness deallocates the <code class="docutils literal notranslate"><span class="pre">ArrowArray</span></code> |
| structure.</p></li> |
| </ol> |
| </section> |
| </section> |
| <section id="running-integration-tests"> |
| <span id="id1"></span><h2>Running integration tests<a class="headerlink" href="#running-integration-tests" title="Permalink to this heading">#</a></h2> |
| <p>The integration test data generator and runner are implemented inside |
| the <a class="reference internal" href="../developers/continuous_integration/archery.html#archery"><span class="std std-ref">Archery</span></a> utility. You need to install the <code class="docutils literal notranslate"><span class="pre">integration</span></code> |
| component of archery:</p> |
| <div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span><span class="s2">"dev/archery[integration]"</span> |
| </pre></div> |
| </div> |
| <p>The integration tests are run using the <code class="docutils literal notranslate"><span class="pre">archery</span> <span class="pre">integration</span></code> command.</p> |
| <div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>integration<span class="w"> </span>--help |
| </pre></div> |
| </div> |
| <p>In order to run integration tests, you’ll first need to build each component |
| you want to include. See the respective developer docs for C++, Java, etc. |
| for instructions on building those.</p> |
| <p>Some languages may require additional build options to enable integration |
| testing. For C++, for example, you need to add <code class="docutils literal notranslate"><span class="pre">-DARROW_BUILD_INTEGRATION=ON</span></code> |
| to your cmake command.</p> |
| <p>Depending on which components you have built, you can enable and add them to |
| the archery test run. For example, if you only have the C++ project built |
| and want to run the Arrow IPC integration tests, run:</p> |
| <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>archery<span class="w"> </span>integration<span class="w"> </span>--run-ipc<span class="w"> </span>--with-cpp<span class="o">=</span><span class="m">1</span> |
| </pre></div> |
| </div> |
| <p>For Java, it may look like:</p> |
| <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">VERSION</span><span class="o">=</span><span class="m">14</span>.0.0-SNAPSHOT |
| <span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_JAVA_INTEGRATION_JAR</span><span class="o">=</span><span class="nv">$JAVA_DIR</span>/tools/target/arrow-tools-<span class="nv">$VERSION</span>-jar-with-dependencies.jar |
| archery<span class="w"> </span>integration<span class="w"> </span>--run-ipc<span class="w"> </span>--with-cpp<span class="o">=</span><span class="m">1</span><span class="w"> </span>--with-java<span class="o">=</span><span class="m">1</span> |
| </pre></div> |
| </div> |
| <p>To run all tests, including Flight and C Data Interface integration tests, do:</p> |
| <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>archery<span class="w"> </span>integration<span class="w"> </span>--with-all<span class="w"> </span>--run-flight<span class="w"> </span>--run-ipc<span class="w"> </span>--run-c-data |
| </pre></div> |
| </div> |
| <p>Note that we run these tests in continuous integration, and the CI job uses |
| docker-compose. You may also run the docker-compose job locally, or at least |
| refer to it if you have questions about how to build other languages or enable |
| certain tests.</p> |
| <p>See <a class="reference internal" href="../developers/continuous_integration/docker.html#docker-builds"><span class="std std-ref">Running Docker Builds</span></a> for more information about the project’s |
| <code class="docutils literal notranslate"><span class="pre">docker-compose</span></code> configuration.</p> |
| </section> |
| <section id="json-test-data-format"> |
| <span id="format-json-integration"></span><h2>JSON test data format<a class="headerlink" href="#json-test-data-format" title="Permalink to this heading">#</a></h2> |
| <p>A JSON representation of Arrow columnar data is provided for |
| cross-language integration testing purposes. |
| This representation is <a class="reference external" href="https://lists.apache.org/thread.html/6947fb7666a0f9cc27d9677d2dad0fb5990f9063b7cf3d80af5e270f%40%3Cdev.arrow.apache.org%3E">not canonical</a> |
| but it provides a human-readable way of verifying language implementations.</p> |
| <p>See <a class="reference external" href="https://github.com/apache/arrow/tree/main/docs/source/format/integration_json_examples">here</a> |
| for some examples of this JSON data.</p> |
| <p>The high level structure of a JSON integration test files is as follows:</p> |
| <p><strong>Data file</strong></p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"schema"</span><span class="p">:</span> <span class="o">/*</span><span class="n">Schema</span><span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"batches"</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span><span class="n">RecordBatch</span><span class="o">*/</span> <span class="p">],</span> |
| <span class="s2">"dictionaries"</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span><span class="n">DictionaryBatch</span><span class="o">*/</span> <span class="p">],</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>All files contain <code class="docutils literal notranslate"><span class="pre">schema</span></code> and <code class="docutils literal notranslate"><span class="pre">batches</span></code>, while <code class="docutils literal notranslate"><span class="pre">dictionaries</span></code> is only |
| present if there are dictionary type fields in the schema.</p> |
| <p><strong>Schema</strong></p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"fields"</span> <span class="p">:</span> <span class="p">[</span> |
| <span class="o">/*</span> <span class="n">Field</span> <span class="o">*/</span> |
| <span class="p">],</span> |
| <span class="s2">"metadata"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">Metadata</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p><strong>Field</strong></p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"name_of_the_field"</span><span class="p">,</span> |
| <span class="s2">"nullable"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"type"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">Type</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"children"</span> <span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">Field</span> <span class="o">*/</span> <span class="p">],</span> |
| <span class="s2">"dictionary"</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s2">"id"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"indexType"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">Type</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"isOrdered"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span> |
| <span class="p">},</span> |
| <span class="s2">"metadata"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">Metadata</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The <code class="docutils literal notranslate"><span class="pre">dictionary</span></code> attribute is present if and only if the <code class="docutils literal notranslate"><span class="pre">Field</span></code> corresponds to a |
| dictionary type, and its <code class="docutils literal notranslate"><span class="pre">id</span></code> maps onto a column in the <code class="docutils literal notranslate"><span class="pre">DictionaryBatch</span></code>. In this |
| case the <code class="docutils literal notranslate"><span class="pre">type</span></code> attribute describes the value type of the dictionary.</p> |
| <p>For primitive types, <code class="docutils literal notranslate"><span class="pre">children</span></code> is an empty array.</p> |
| <p><strong>Metadata</strong></p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">null</span> <span class="o">|</span> |
| <span class="p">[</span> <span class="p">{</span> |
| <span class="s2">"key"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">string</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"value"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">string</span> <span class="o">*/</span> |
| <span class="p">}</span> <span class="p">]</span> |
| </pre></div> |
| </div> |
| <p>A key-value mapping of custom metadata. It may be omitted or null, in which case it is |
| considered equivalent to <code class="docutils literal notranslate"><span class="pre">[]</span></code> (no metadata). Duplicated keys are not forbidden here.</p> |
| <p><strong>Type</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"null|struct|list|largelist|listview|largelistview|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|utf8view|binaryview|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map|runendencoded"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>A <code class="docutils literal notranslate"><span class="pre">Type</span></code> will have other fields as defined in |
| <a class="reference external" href="https://github.com/apache/arrow/tree/main/format/Schema.fbs">Schema.fbs</a> |
| depending on its name.</p> |
| <p>Int:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"int"</span><span class="p">,</span> |
| <span class="s2">"bitWidth"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"isSigned"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>FloatingPoint:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"floatingpoint"</span><span class="p">,</span> |
| <span class="s2">"precision"</span> <span class="p">:</span> <span class="s2">"HALF|SINGLE|DOUBLE"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>FixedSizeBinary:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"fixedsizebinary"</span><span class="p">,</span> |
| <span class="s2">"byteWidth"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">byte</span> <span class="n">width</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Decimal:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"decimal"</span><span class="p">,</span> |
| <span class="s2">"precision"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"scale"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Timestamp:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"timestamp"</span><span class="p">,</span> |
| <span class="s2">"unit"</span> <span class="p">:</span> <span class="s2">"$TIME_UNIT"</span><span class="p">,</span> |
| <span class="s2">"timezone"</span><span class="p">:</span> <span class="s2">"$timezone"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p><code class="docutils literal notranslate"><span class="pre">$TIME_UNIT</span></code> is one of <code class="docutils literal notranslate"><span class="pre">"SECOND|MILLISECOND|MICROSECOND|NANOSECOND"</span></code></p> |
| <p>“timezone” is an optional string.</p> |
| <p>Duration:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"duration"</span><span class="p">,</span> |
| <span class="s2">"unit"</span> <span class="p">:</span> <span class="s2">"$TIME_UNIT"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Date:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"date"</span><span class="p">,</span> |
| <span class="s2">"unit"</span> <span class="p">:</span> <span class="s2">"DAY|MILLISECOND"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Time:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"time"</span><span class="p">,</span> |
| <span class="s2">"unit"</span> <span class="p">:</span> <span class="s2">"$TIME_UNIT"</span><span class="p">,</span> |
| <span class="s2">"bitWidth"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span><span class="p">:</span> <span class="mi">32</span> <span class="ow">or</span> <span class="mi">64</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Interval:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"interval"</span><span class="p">,</span> |
| <span class="s2">"unit"</span> <span class="p">:</span> <span class="s2">"YEAR_MONTH|DAY_TIME"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Union:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"union"</span><span class="p">,</span> |
| <span class="s2">"mode"</span> <span class="p">:</span> <span class="s2">"SPARSE|DENSE"</span><span class="p">,</span> |
| <span class="s2">"typeIds"</span> <span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span> <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The <code class="docutils literal notranslate"><span class="pre">typeIds</span></code> field in <code class="docutils literal notranslate"><span class="pre">Union</span></code> are the codes used to denote which member of |
| the union is active in each array slot. Note that in general these discriminants are not identical |
| to the index of the corresponding child array.</p> |
| <p>List:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"list"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The type that the list is a “list of” will be included in the <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s |
| “children” member, as a single <code class="docutils literal notranslate"><span class="pre">Field</span></code> there. For example, for a list of |
| <code class="docutils literal notranslate"><span class="pre">int32</span></code>,</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"list_nullable"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"list"</span> |
| <span class="p">},</span> |
| <span class="s2">"nullable"</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span> |
| <span class="s2">"children"</span><span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"item"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"int"</span><span class="p">,</span> |
| <span class="s2">"isSigned"</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span> |
| <span class="s2">"bitWidth"</span><span class="p">:</span> <span class="mi">32</span> |
| <span class="p">},</span> |
| <span class="s2">"nullable"</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span> |
| <span class="s2">"children"</span><span class="p">:</span> <span class="p">[]</span> |
| <span class="p">}</span> |
| <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>FixedSizeList:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"fixedsizelist"</span><span class="p">,</span> |
| <span class="s2">"listSize"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>This type likewise comes with a length-1 “children” array.</p> |
| <p>Struct:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"struct"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” contains an array of <code class="docutils literal notranslate"><span class="pre">Fields</span></code> with meaningful |
| names and types.</p> |
| <p>Map:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"map"</span><span class="p">,</span> |
| <span class="s2">"keysSorted"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” contains a single <code class="docutils literal notranslate"><span class="pre">struct</span></code> field, which itself |
| contains 2 children, named “key” and “value”.</p> |
| <p>Null:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"null"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>RunEndEncoded:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"runendencoded"</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” should be exactly two child fields. The first |
| child must be named “run_ends”, be non-nullable and be either an <code class="docutils literal notranslate"><span class="pre">int16</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">int32</span></code>, or <code class="docutils literal notranslate"><span class="pre">int64</span></code> type field. The second child must be named “values”, |
| but can be of any type.</p> |
| <p>Extension types are, as in the IPC format, represented as their underlying |
| storage type plus some dedicated field metadata to reconstruct the extension |
| type. For example, assuming a “uuid” extension type backed by a |
| FixedSizeBinary(16) storage, here is how a “uuid” field would be represented:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"name_of_the_field"</span><span class="p">,</span> |
| <span class="s2">"nullable"</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"type"</span> <span class="p">:</span> <span class="p">{</span> |
| <span class="s2">"name"</span> <span class="p">:</span> <span class="s2">"fixedsizebinary"</span><span class="p">,</span> |
| <span class="s2">"byteWidth"</span> <span class="p">:</span> <span class="mi">16</span> |
| <span class="p">},</span> |
| <span class="s2">"children"</span> <span class="p">:</span> <span class="p">[],</span> |
| <span class="s2">"metadata"</span> <span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span><span class="s2">"key"</span><span class="p">:</span> <span class="s2">"ARROW:extension:name"</span><span class="p">,</span> <span class="s2">"value"</span><span class="p">:</span> <span class="s2">"uuid"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"key"</span><span class="p">:</span> <span class="s2">"ARROW:extension:metadata"</span><span class="p">,</span> <span class="s2">"value"</span><span class="p">:</span> <span class="s2">"uuid-serialized"</span><span class="p">}</span> |
| <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p><strong>RecordBatch</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"count"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="n">number</span> <span class="n">of</span> <span class="n">rows</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"columns"</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">FieldData</span> <span class="o">*/</span> <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p><strong>DictionaryBatch</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"id"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span> |
| <span class="s2">"data"</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">RecordBatch</span> <span class="o">*/</span> <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p><strong>FieldData</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"field_name"</span><span class="p">,</span> |
| <span class="s2">"count"</span> <span class="s2">"field_length"</span><span class="p">,</span> |
| <span class="s2">"$BUFFER_TYPE"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">BufferData</span> <span class="o">*/</span> |
| <span class="o">...</span> |
| <span class="s2">"$BUFFER_TYPE"</span><span class="p">:</span> <span class="o">/*</span> <span class="n">BufferData</span> <span class="o">*/</span> |
| <span class="s2">"children"</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">FieldData</span> <span class="o">*/</span> <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>The “name” member of a <code class="docutils literal notranslate"><span class="pre">Field</span></code> in the <code class="docutils literal notranslate"><span class="pre">Schema</span></code> corresponds to the “name” |
| of a <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> contained in the “columns” of a <code class="docutils literal notranslate"><span class="pre">RecordBatch</span></code>. |
| For nested types (list, struct, etc.), <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” each have a |
| “name” that corresponds to the “name” of a <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> inside the |
| “children” of that <code class="docutils literal notranslate"><span class="pre">FieldData</span></code>. |
| For <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> inside of a <code class="docutils literal notranslate"><span class="pre">DictionaryBatch</span></code>, the “name” field does not |
| correspond to anything.</p> |
| <p>Here <code class="docutils literal notranslate"><span class="pre">$BUFFER_TYPE</span></code> is one of <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code>, <code class="docutils literal notranslate"><span class="pre">OFFSET</span></code> (for |
| variable-length types, such as strings and lists), <code class="docutils literal notranslate"><span class="pre">TYPE_ID</span></code> (for unions), |
| or <code class="docutils literal notranslate"><span class="pre">DATA</span></code>.</p> |
| <p><code class="docutils literal notranslate"><span class="pre">BufferData</span></code> is encoded based on the type of buffer:</p> |
| <ul> |
| <li><p><code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code>: a JSON array of 1 (valid) and 0 (null). Data for non-nullable |
| <code class="docutils literal notranslate"><span class="pre">Field</span></code> still has a <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code> array, even though all values are 1.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">OFFSET</span></code>: a JSON array of integers for 32-bit offsets or |
| string-formatted integers for 64-bit offsets.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">TYPE_ID</span></code>: a JSON array of integers.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">DATA</span></code>: a JSON array of encoded values.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">VARIADIC_DATA_BUFFERS</span></code>: a JSON array of data buffers represented as |
| hex encoded strings.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">VIEWS</span></code>: a JSON array of encoded views, which are JSON objects with: |
| * <code class="docutils literal notranslate"><span class="pre">SIZE</span></code>: an integer indicating the size of the view, |
| * <code class="docutils literal notranslate"><span class="pre">INLINED</span></code>: an encoded value (this field will be present if <code class="docutils literal notranslate"><span class="pre">SIZE</span></code></p> |
| <blockquote> |
| <div><p>is smaller than 12, otherwise the next three fields will be present),</p> |
| </div></blockquote> |
| <ul class="simple"> |
| <li><p><code class="docutils literal notranslate"><span class="pre">PREFIX_HEX</span></code>: the first four bytes of the view encoded as hex,</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">BUFFER_INDEX</span></code>: the index in <code class="docutils literal notranslate"><span class="pre">VARIADIC_DATA_BUFFERS</span></code> of the buffer |
| viewed,</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">OFFSET</span></code>: the offset in the buffer viewed.</p></li> |
| </ul> |
| </li> |
| </ul> |
| <p>The value encoding for <code class="docutils literal notranslate"><span class="pre">DATA</span></code> is different depending on the logical |
| type:</p> |
| <ul class="simple"> |
| <li><p>For boolean type: an array of 1 (true) and 0 (false).</p></li> |
| <li><p>For integer-based types (including timestamps): an array of JSON numbers.</p></li> |
| <li><p>For 64-bit integers: an array of integers formatted as JSON strings, |
| so as to avoid loss of precision.</p></li> |
| <li><p>For floating point types: an array of JSON numbers. Values are limited |
| to 3 decimal places to avoid loss of precision.</p></li> |
| <li><p>For binary types, an array of uppercase hex-encoded strings, so as |
| to represent arbitrary binary data.</p></li> |
| <li><p>For UTF-8 string types, an array of JSON strings.</p></li> |
| </ul> |
| <p>For “list” and “largelist” types, <code class="docutils literal notranslate"><span class="pre">BufferData</span></code> has <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code> and |
| <code class="docutils literal notranslate"><span class="pre">OFFSET</span></code>, and the rest of the data is inside “children”. These child |
| <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> contain all of the same attributes as non-child data, so in |
| the example of a list of <code class="docutils literal notranslate"><span class="pre">int32</span></code>, the child data has <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code> and |
| <code class="docutils literal notranslate"><span class="pre">DATA</span></code>.</p> |
| <p>For “fixedsizelist”, there is no <code class="docutils literal notranslate"><span class="pre">OFFSET</span></code> member because the offsets are |
| implied by the field’s “listSize”.</p> |
| <p>Note that the “count” for these child data may not match the parent “count”. |
| For example, if a <code class="docutils literal notranslate"><span class="pre">RecordBatch</span></code> has 7 rows and contains a <code class="docutils literal notranslate"><span class="pre">FixedSizeList</span></code> |
| of <code class="docutils literal notranslate"><span class="pre">listSize</span></code> 4, then the data inside the “children” of that <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> |
| will have count 28.</p> |
| <p>For “null” type, <code class="docutils literal notranslate"><span class="pre">BufferData</span></code> does not contain any buffers.</p> |
| </section> |
| <section id="archery-integration-test-cases"> |
| <h2>Archery Integration Test Cases<a class="headerlink" href="#archery-integration-test-cases" title="Permalink to this heading">#</a></h2> |
| <p>This list can make it easier to understand what manual testing may need to |
| be done for any future Arrow Format changes by knowing what cases the automated |
| integration testing actually tests.</p> |
| <p>There are two types of integration test cases: the ones populated on the fly |
| by the data generator in the Archery utility, and <em>gold</em> files that exist |
| in the <cite>arrow-testing <https://github.com/apache/arrow-testing/tree/master/data/arrow-ipc-stream/integration></cite> |
| repository.</p> |
| <section id="data-generator-tests"> |
| <h3>Data Generator Tests<a class="headerlink" href="#data-generator-tests" title="Permalink to this heading">#</a></h3> |
| <p>This is the high-level description of the cases which are generated and |
| tested using the <code class="docutils literal notranslate"><span class="pre">archery</span> <span class="pre">integration</span></code> command (see <code class="docutils literal notranslate"><span class="pre">get_generated_json_files</span></code> |
| in <code class="docutils literal notranslate"><span class="pre">datagen.py</span></code>):</p> |
| <ul class="simple"> |
| <li><p>Primitive Types |
| - No Batches |
| - Various Primitive Values |
| - Batches with Zero Length |
| - String and Binary Large offset cases</p></li> |
| <li><p>Null Type |
| * Trivial Null batches</p></li> |
| <li><p>Decimal128</p></li> |
| <li><p>Decimal256</p></li> |
| <li><p>DateTime with various units</p></li> |
| <li><p>Durations with various units</p></li> |
| <li><p>Intervals |
| - MonthDayNano interval is a separate case</p></li> |
| <li><p>Map Types |
| - Non-Canonical Maps</p></li> |
| <li><p>Nested Types |
| - Lists |
| - Structs |
| - Lists with Large Offsets</p></li> |
| <li><p>Unions</p></li> |
| <li><p>Custom Metadata</p></li> |
| <li><p>Schemas with Duplicate Field Names</p></li> |
| <li><p>Dictionary Types |
| - Signed indices |
| - Unsigned indices |
| - Nested dictionaries</p></li> |
| <li><p>Run end encoded</p></li> |
| <li><p>Binary view and string view</p></li> |
| <li><p>List view and large list view</p></li> |
| <li><p>Extension Types</p></li> |
| </ul> |
| </section> |
| <section id="gold-file-integration-tests"> |
| <h3>Gold File Integration Tests<a class="headerlink" href="#gold-file-integration-tests" title="Permalink to this heading">#</a></h3> |
| <p>Pre-generated json and arrow IPC files (both file and stream format) exist |
| in the <a class="reference external" href="https://github.com/apache/arrow-testing">arrow-testing</a> repository |
| in the <code class="docutils literal notranslate"><span class="pre">data/arrow-ipc-stream/integration</span></code> directory. These serve as |
| <em>gold</em> files that are assumed to be correct for use in testing. They are |
| referenced by <code class="docutils literal notranslate"><span class="pre">runner.py</span></code> in the code for the <a class="reference internal" href="../developers/continuous_integration/archery.html#archery"><span class="std std-ref">Archery</span></a> |
| utility. Below are the test cases which are covered by them:</p> |
| <ul class="simple"> |
| <li><p>Backwards Compatibility</p> |
| <ul> |
| <li><p>The following cases are tested using the 0.14.1 format:</p> |
| <ul> |
| <li><p>datetime</p></li> |
| <li><p>decimals</p></li> |
| <li><p>dictionaries</p></li> |
| <li><p>intervals</p></li> |
| <li><p>maps</p></li> |
| <li><p>nested types (list, struct)</p></li> |
| <li><p>primitives</p></li> |
| <li><p>primitive with no batches</p></li> |
| <li><p>primitive with zero length batches</p></li> |
| </ul> |
| </li> |
| <li><p>The following is tested for 0.17.1 format:</p> |
| <ul> |
| <li><p>unions</p></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li><p>Endianness</p> |
| <ul> |
| <li><p>The following cases are tested with both Little Endian and Big Endian versions for auto conversion</p> |
| <ul> |
| <li><p>custom metadata</p></li> |
| <li><p>datetime</p></li> |
| <li><p>decimals</p></li> |
| <li><p>decimal256</p></li> |
| <li><p>dictionaries</p></li> |
| <li><p>dictionaries with unsigned indices</p></li> |
| <li><p>record batches with duplicate fieldnames</p></li> |
| <li><p>extension types</p></li> |
| <li><p>interval types</p></li> |
| <li><p>map types</p></li> |
| <li><p>non-canonical map data</p></li> |
| <li><p>nested types (lists, structs)</p></li> |
| <li><p>nested dictionaries</p></li> |
| <li><p>nested large offset types</p></li> |
| <li><p>nulls</p></li> |
| <li><p>primitive data</p></li> |
| <li><p>large offset binary and strings</p></li> |
| <li><p>primitives with no batches included</p></li> |
| <li><p>primitive batches with zero length</p></li> |
| <li><p>recursive nested types</p></li> |
| <li><p>union types</p></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li><p>Compression tests</p> |
| <ul> |
| <li><p>LZ4</p></li> |
| <li><p>ZSTD</p></li> |
| </ul> |
| </li> |
| <li><p>Batches with Shared Dictionaries</p></li> |
| </ul> |
| </section> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="Changing.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Changing the Apache Arrow Format Specification</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="Glossary.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Glossary</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#strategy">Strategy</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#example-ipc-format">Example: IPC format</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#example-c-data-interface">Example: C Data Interface</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-integration-tests">Running integration tests</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#json-test-data-format">JSON test data format</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#archery-integration-test-cases">Archery Integration Test Cases</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-generator-tests">Data Generator Tests</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gold-file-integration-tests">Gold File Integration Tests</a></li> |
| </ul> |
| </li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/main/docs/source/format/Integration.rst"> |
| <i class="fa-solid fa-pencil"></i> |
| |
| |
| |
| Edit on GitHub |
| |
| |
| </a> |
| </div> |
| </div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script> |
| <script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"> |
| |
| <p class="copyright"> |
| |
| © Copyright 2016-2024 Apache Software Foundation. |
| Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. |
| <br/> |
| |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"> |
| <p class="theme-version"> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |