blob: 397cbcbd2535c687f01029160658c7f5962070af [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Integration Testing &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'format/Integration';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/format/Integration.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Glossary" href="Glossary.html" />
<link rel="prev" title="Changing the Apache Arrow Format Specification" href="Changing.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="Versioning.html">Format Versioning and Stability</a></li>
<li class="toctree-l1"><a class="reference internal" href="Columnar.html">Arrow Columnar Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="CanonicalExtensions.html">Canonical Extension Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="Other.html">Other Data Structures</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="CDataInterface.html">The Arrow C data interface</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="CStreamInterface.html">The Arrow C stream interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="CDeviceDataInterface.html">The Arrow C Device data interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="Flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="FlightSql.html">Arrow Flight SQL</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="ADBC.html">ADBC: Arrow Database Connectivity</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="ADBC/C.html">ADBC C API Specification</a></li>
<li class="toctree-l2"><a class="reference internal" href="ADBC/Go.html">ADBC Go API Specification</a></li>
<li class="toctree-l2"><a class="reference internal" href="ADBC/Java.html">ADBC Java API Specification</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Changing.html">Changing the Apache Arrow Format Specification</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Integration Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="Glossary.html">Glossary</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Specifications</a></li>
<li class="breadcrumb-item active" aria-current="page">Integration Testing</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="integration-testing">
<span id="format-integration-testing"></span><h1>Integration Testing<a class="headerlink" href="#integration-testing" title="Permalink to this heading">#</a></h1>
<p>To ensure Arrow implementations are interoperable between each other,
the Arrow project includes cross-language integration tests which are
regularly run as Continuous Integration tasks.</p>
<p>The integration tests exercise compliance with several Arrow specifications:
the <a class="reference internal" href="Columnar.html#format-ipc"><span class="std std-ref">IPC format</span></a>, the <a class="reference internal" href="Flight.html#flight-rpc"><span class="std std-ref">Flight RPC</span></a> protocol,
and the <a class="reference internal" href="CDataInterface.html#c-data-interface"><span class="std std-ref">C Data Interface</span></a>.</p>
<section id="strategy">
<h2>Strategy<a class="headerlink" href="#strategy" title="Permalink to this heading">#</a></h2>
<p>Our strategy for integration testing between Arrow implementations is:</p>
<ul class="simple">
<li><p>Test datasets are specified in a custom human-readable,
<a class="reference internal" href="#format-json-integration"><span class="std std-ref">JSON-based format</span></a> designed exclusively
for Arrow’s integration tests.</p></li>
<li><p>The JSON files are generated by the integration test harness. Different
files are used to represent different data types and features, such as
numerics, lists, dictionary encoding, etc. This makes it easier to pinpoint
incompatibilities than if all data types were represented in a single file.</p></li>
<li><p>Each implementation provides entry points capable of converting
between the JSON and the Arrow in-memory representation, and of exposing
Arrow in-memory data using the desired format.</p></li>
<li><p>Each format (whether Arrow IPC, Flight or the C Data Interface) is tested for
all supported pairs of (producer, consumer) implementations. The producer
typically reads a JSON file, converts it to in-memory Arrow data, and exposes
this data using the format under test. The consumer reads the data in the
said format and converts it back to Arrow in-memory data; it also reads
the same JSON file as the producer, and validates that both datasets are
identical.</p></li>
</ul>
<section id="example-ipc-format">
<h3>Example: IPC format<a class="headerlink" href="#example-ipc-format" title="Permalink to this heading">#</a></h3>
<p>Let’s say we are testing Arrow C++ as a producer and Arrow Java as a consumer
of the Arrow IPC format. Testing a JSON file would go as follows:</p>
<ol class="arabic simple">
<li><p>A C++ executable reads the JSON file, converts it into Arrow in-memory data
and writes an Arrow IPC file (the file paths are typically given on the command
line).</p></li>
<li><p>A Java executable reads the JSON file, converts it into Arrow in-memory data;
it also reads the Arrow IPC file generated by C++. Finally, it validates that
both Arrow in-memory datasets are equal.</p></li>
</ol>
</section>
<section id="example-c-data-interface">
<h3>Example: C Data Interface<a class="headerlink" href="#example-c-data-interface" title="Permalink to this heading">#</a></h3>
<p>Now, let’s say we are testing Arrow Go as a producer and Arrow C# as a consumer
of the Arrow C Data Interface.</p>
<ol class="arabic simple">
<li><p>The integration testing harness allocates a C
<a class="reference internal" href="CDataInterface.html#c-data-interface-struct-defs"><span class="std std-ref">ArrowArray</span></a> structure on the heap.</p></li>
<li><p>A Go in-process entrypoint (for example a C-compatible function call)
reads a JSON file and exports one of its <a class="reference internal" href="Glossary.html#term-record-batch"><span class="xref std std-term">record batches</span></a>
into the <code class="docutils literal notranslate"><span class="pre">ArrowArray</span></code> structure.</p></li>
<li><p>A C# in-process entrypoint reads the same JSON file, converts the
same record batch into Arrow in-memory data; it also imports the
record batch exported by Arrow Go in the <code class="docutils literal notranslate"><span class="pre">ArrowArray</span></code> structure.
It validates that both record batches are equal, and then releases the
imported record batch.</p></li>
<li><p>Depending on the implementation languages’ abilities, the integration
testing harness may assert that memory consumption remained identical
(i.e., that the exported record batch didn’t leak).</p></li>
<li><p>At the end, the integration testing harness deallocates the <code class="docutils literal notranslate"><span class="pre">ArrowArray</span></code>
structure.</p></li>
</ol>
</section>
</section>
<section id="running-integration-tests">
<span id="id1"></span><h2>Running integration tests<a class="headerlink" href="#running-integration-tests" title="Permalink to this heading">#</a></h2>
<p>The integration test data generator and runner are implemented inside
the <a class="reference internal" href="../developers/continuous_integration/archery.html#archery"><span class="std std-ref">Archery</span></a> utility. You need to install the <code class="docutils literal notranslate"><span class="pre">integration</span></code>
component of archery:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span><span class="s2">&quot;dev/archery[integration]&quot;</span>
</pre></div>
</div>
<p>The integration tests are run using the <code class="docutils literal notranslate"><span class="pre">archery</span> <span class="pre">integration</span></code> command.</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>archery<span class="w"> </span>integration<span class="w"> </span>--help
</pre></div>
</div>
<p>In order to run integration tests, you’ll first need to build each component
you want to include. See the respective developer docs for C++, Java, etc.
for instructions on building those.</p>
<p>Some languages may require additional build options to enable integration
testing. For C++, for example, you need to add <code class="docutils literal notranslate"><span class="pre">-DARROW_BUILD_INTEGRATION=ON</span></code>
to your cmake command.</p>
<p>Depending on which components you have built, you can enable and add them to
the archery test run. For example, if you only have the C++ project built
and want to run the Arrow IPC integration tests, run:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>archery<span class="w"> </span>integration<span class="w"> </span>--run-ipc<span class="w"> </span>--with-cpp<span class="o">=</span><span class="m">1</span>
</pre></div>
</div>
<p>For Java, it may look like:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nv">VERSION</span><span class="o">=</span><span class="m">14</span>.0.0-SNAPSHOT
<span class="nb">export</span><span class="w"> </span><span class="nv">ARROW_JAVA_INTEGRATION_JAR</span><span class="o">=</span><span class="nv">$JAVA_DIR</span>/tools/target/arrow-tools-<span class="nv">$VERSION</span>-jar-with-dependencies.jar
archery<span class="w"> </span>integration<span class="w"> </span>--run-ipc<span class="w"> </span>--with-cpp<span class="o">=</span><span class="m">1</span><span class="w"> </span>--with-java<span class="o">=</span><span class="m">1</span>
</pre></div>
</div>
<p>To run all tests, including Flight and C Data Interface integration tests, do:</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>archery<span class="w"> </span>integration<span class="w"> </span>--with-all<span class="w"> </span>--run-flight<span class="w"> </span>--run-ipc<span class="w"> </span>--run-c-data
</pre></div>
</div>
<p>Note that we run these tests in continuous integration, and the CI job uses
docker-compose. You may also run the docker-compose job locally, or at least
refer to it if you have questions about how to build other languages or enable
certain tests.</p>
<p>See <a class="reference internal" href="../developers/continuous_integration/docker.html#docker-builds"><span class="std std-ref">Running Docker Builds</span></a> for more information about the project’s
<code class="docutils literal notranslate"><span class="pre">docker-compose</span></code> configuration.</p>
</section>
<section id="json-test-data-format">
<span id="format-json-integration"></span><h2>JSON test data format<a class="headerlink" href="#json-test-data-format" title="Permalink to this heading">#</a></h2>
<p>A JSON representation of Arrow columnar data is provided for
cross-language integration testing purposes.
This representation is <a class="reference external" href="https://lists.apache.org/thread.html/6947fb7666a0f9cc27d9677d2dad0fb5990f9063b7cf3d80af5e270f%40%3Cdev.arrow.apache.org%3E">not canonical</a>
but it provides a human-readable way of verifying language implementations.</p>
<p>See <a class="reference external" href="https://github.com/apache/arrow/tree/main/docs/source/format/integration_json_examples">here</a>
for some examples of this JSON data.</p>
<p>The high level structure of a JSON integration test files is as follows:</p>
<p><strong>Data file</strong></p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;schema&quot;</span><span class="p">:</span> <span class="o">/*</span><span class="n">Schema</span><span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;batches&quot;</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span><span class="n">RecordBatch</span><span class="o">*/</span> <span class="p">],</span>
<span class="s2">&quot;dictionaries&quot;</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span><span class="n">DictionaryBatch</span><span class="o">*/</span> <span class="p">],</span>
<span class="p">}</span>
</pre></div>
</div>
<p>All files contain <code class="docutils literal notranslate"><span class="pre">schema</span></code> and <code class="docutils literal notranslate"><span class="pre">batches</span></code>, while <code class="docutils literal notranslate"><span class="pre">dictionaries</span></code> is only
present if there are dictionary type fields in the schema.</p>
<p><strong>Schema</strong></p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;fields&quot;</span> <span class="p">:</span> <span class="p">[</span>
<span class="o">/*</span> <span class="n">Field</span> <span class="o">*/</span>
<span class="p">],</span>
<span class="s2">&quot;metadata&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">Metadata</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p><strong>Field</strong></p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;name_of_the_field&quot;</span><span class="p">,</span>
<span class="s2">&quot;nullable&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">Type</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;children&quot;</span> <span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">Field</span> <span class="o">*/</span> <span class="p">],</span>
<span class="s2">&quot;dictionary&quot;</span><span class="p">:</span> <span class="p">{</span>
<span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;indexType&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">Type</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;isOrdered&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span>
<span class="p">},</span>
<span class="s2">&quot;metadata&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">Metadata</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">dictionary</span></code> attribute is present if and only if the <code class="docutils literal notranslate"><span class="pre">Field</span></code> corresponds to a
dictionary type, and its <code class="docutils literal notranslate"><span class="pre">id</span></code> maps onto a column in the <code class="docutils literal notranslate"><span class="pre">DictionaryBatch</span></code>. In this
case the <code class="docutils literal notranslate"><span class="pre">type</span></code> attribute describes the value type of the dictionary.</p>
<p>For primitive types, <code class="docutils literal notranslate"><span class="pre">children</span></code> is an empty array.</p>
<p><strong>Metadata</strong></p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">null</span> <span class="o">|</span>
<span class="p">[</span> <span class="p">{</span>
<span class="s2">&quot;key&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">string</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;value&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">string</span> <span class="o">*/</span>
<span class="p">}</span> <span class="p">]</span>
</pre></div>
</div>
<p>A key-value mapping of custom metadata. It may be omitted or null, in which case it is
considered equivalent to <code class="docutils literal notranslate"><span class="pre">[]</span></code> (no metadata). Duplicated keys are not forbidden here.</p>
<p><strong>Type</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;null|struct|list|largelist|listview|largelistview|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|utf8view|binaryview|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map|runendencoded&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>A <code class="docutils literal notranslate"><span class="pre">Type</span></code> will have other fields as defined in
<a class="reference external" href="https://github.com/apache/arrow/tree/main/format/Schema.fbs">Schema.fbs</a>
depending on its name.</p>
<p>Int:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;int&quot;</span><span class="p">,</span>
<span class="s2">&quot;bitWidth&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;isSigned&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>FloatingPoint:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;floatingpoint&quot;</span><span class="p">,</span>
<span class="s2">&quot;precision&quot;</span> <span class="p">:</span> <span class="s2">&quot;HALF|SINGLE|DOUBLE&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>FixedSizeBinary:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;fixedsizebinary&quot;</span><span class="p">,</span>
<span class="s2">&quot;byteWidth&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">byte</span> <span class="n">width</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Decimal:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;decimal&quot;</span><span class="p">,</span>
<span class="s2">&quot;precision&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;scale&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Timestamp:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;timestamp&quot;</span><span class="p">,</span>
<span class="s2">&quot;unit&quot;</span> <span class="p">:</span> <span class="s2">&quot;$TIME_UNIT&quot;</span><span class="p">,</span>
<span class="s2">&quot;timezone&quot;</span><span class="p">:</span> <span class="s2">&quot;$timezone&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">$TIME_UNIT</span></code> is one of <code class="docutils literal notranslate"><span class="pre">&quot;SECOND|MILLISECOND|MICROSECOND|NANOSECOND&quot;</span></code></p>
<p>“timezone” is an optional string.</p>
<p>Duration:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;duration&quot;</span><span class="p">,</span>
<span class="s2">&quot;unit&quot;</span> <span class="p">:</span> <span class="s2">&quot;$TIME_UNIT&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Date:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;date&quot;</span><span class="p">,</span>
<span class="s2">&quot;unit&quot;</span> <span class="p">:</span> <span class="s2">&quot;DAY|MILLISECOND&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Time:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;time&quot;</span><span class="p">,</span>
<span class="s2">&quot;unit&quot;</span> <span class="p">:</span> <span class="s2">&quot;$TIME_UNIT&quot;</span><span class="p">,</span>
<span class="s2">&quot;bitWidth&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span><span class="p">:</span> <span class="mi">32</span> <span class="ow">or</span> <span class="mi">64</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Interval:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;interval&quot;</span><span class="p">,</span>
<span class="s2">&quot;unit&quot;</span> <span class="p">:</span> <span class="s2">&quot;YEAR_MONTH|DAY_TIME&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Union:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;union&quot;</span><span class="p">,</span>
<span class="s2">&quot;mode&quot;</span> <span class="p">:</span> <span class="s2">&quot;SPARSE|DENSE&quot;</span><span class="p">,</span>
<span class="s2">&quot;typeIds&quot;</span> <span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span> <span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">typeIds</span></code> field in <code class="docutils literal notranslate"><span class="pre">Union</span></code> are the codes used to denote which member of
the union is active in each array slot. Note that in general these discriminants are not identical
to the index of the corresponding child array.</p>
<p>List:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;list&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The type that the list is a “list of” will be included in the <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s
“children” member, as a single <code class="docutils literal notranslate"><span class="pre">Field</span></code> there. For example, for a list of
<code class="docutils literal notranslate"><span class="pre">int32</span></code>,</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;list_nullable&quot;</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;list&quot;</span>
<span class="p">},</span>
<span class="s2">&quot;nullable&quot;</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span>
<span class="s2">&quot;children&quot;</span><span class="p">:</span> <span class="p">[</span>
<span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;item&quot;</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;int&quot;</span><span class="p">,</span>
<span class="s2">&quot;isSigned&quot;</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span>
<span class="s2">&quot;bitWidth&quot;</span><span class="p">:</span> <span class="mi">32</span>
<span class="p">},</span>
<span class="s2">&quot;nullable&quot;</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span>
<span class="s2">&quot;children&quot;</span><span class="p">:</span> <span class="p">[]</span>
<span class="p">}</span>
<span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p>FixedSizeList:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;fixedsizelist&quot;</span><span class="p">,</span>
<span class="s2">&quot;listSize&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>This type likewise comes with a length-1 “children” array.</p>
<p>Struct:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;struct&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” contains an array of <code class="docutils literal notranslate"><span class="pre">Fields</span></code> with meaningful
names and types.</p>
<p>Map:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;map&quot;</span><span class="p">,</span>
<span class="s2">&quot;keysSorted&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” contains a single <code class="docutils literal notranslate"><span class="pre">struct</span></code> field, which itself
contains 2 children, named “key” and “value”.</p>
<p>Null:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;null&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>RunEndEncoded:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;runendencoded&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” should be exactly two child fields. The first
child must be named “run_ends”, be non-nullable and be either an <code class="docutils literal notranslate"><span class="pre">int16</span></code>,
<code class="docutils literal notranslate"><span class="pre">int32</span></code>, or <code class="docutils literal notranslate"><span class="pre">int64</span></code> type field. The second child must be named “values”,
but can be of any type.</p>
<p>Extension types are, as in the IPC format, represented as their underlying
storage type plus some dedicated field metadata to reconstruct the extension
type. For example, assuming a “uuid” extension type backed by a
FixedSizeBinary(16) storage, here is how a “uuid” field would be represented:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;name_of_the_field&quot;</span><span class="p">,</span>
<span class="s2">&quot;nullable&quot;</span> <span class="p">:</span> <span class="o">/*</span> <span class="n">boolean</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span> <span class="p">:</span> <span class="p">{</span>
<span class="s2">&quot;name&quot;</span> <span class="p">:</span> <span class="s2">&quot;fixedsizebinary&quot;</span><span class="p">,</span>
<span class="s2">&quot;byteWidth&quot;</span> <span class="p">:</span> <span class="mi">16</span>
<span class="p">},</span>
<span class="s2">&quot;children&quot;</span> <span class="p">:</span> <span class="p">[],</span>
<span class="s2">&quot;metadata&quot;</span> <span class="p">:</span> <span class="p">[</span>
<span class="p">{</span><span class="s2">&quot;key&quot;</span><span class="p">:</span> <span class="s2">&quot;ARROW:extension:name&quot;</span><span class="p">,</span> <span class="s2">&quot;value&quot;</span><span class="p">:</span> <span class="s2">&quot;uuid&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;key&quot;</span><span class="p">:</span> <span class="s2">&quot;ARROW:extension:metadata&quot;</span><span class="p">,</span> <span class="s2">&quot;value&quot;</span><span class="p">:</span> <span class="s2">&quot;uuid-serialized&quot;</span><span class="p">}</span>
<span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p><strong>RecordBatch</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;count&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="n">number</span> <span class="n">of</span> <span class="n">rows</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;columns&quot;</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">FieldData</span> <span class="o">*/</span> <span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p><strong>DictionaryBatch</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">integer</span> <span class="o">*/</span><span class="p">,</span>
<span class="s2">&quot;data&quot;</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">RecordBatch</span> <span class="o">*/</span> <span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p><strong>FieldData</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;field_name&quot;</span><span class="p">,</span>
<span class="s2">&quot;count&quot;</span> <span class="s2">&quot;field_length&quot;</span><span class="p">,</span>
<span class="s2">&quot;$BUFFER_TYPE&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">BufferData</span> <span class="o">*/</span>
<span class="o">...</span>
<span class="s2">&quot;$BUFFER_TYPE&quot;</span><span class="p">:</span> <span class="o">/*</span> <span class="n">BufferData</span> <span class="o">*/</span>
<span class="s2">&quot;children&quot;</span><span class="p">:</span> <span class="p">[</span> <span class="o">/*</span> <span class="n">FieldData</span> <span class="o">*/</span> <span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The “name” member of a <code class="docutils literal notranslate"><span class="pre">Field</span></code> in the <code class="docutils literal notranslate"><span class="pre">Schema</span></code> corresponds to the “name”
of a <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> contained in the “columns” of a <code class="docutils literal notranslate"><span class="pre">RecordBatch</span></code>.
For nested types (list, struct, etc.), <code class="docutils literal notranslate"><span class="pre">Field</span></code>’s “children” each have a
“name” that corresponds to the “name” of a <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> inside the
“children” of that <code class="docutils literal notranslate"><span class="pre">FieldData</span></code>.
For <code class="docutils literal notranslate"><span class="pre">FieldData</span></code> inside of a <code class="docutils literal notranslate"><span class="pre">DictionaryBatch</span></code>, the “name” field does not
correspond to anything.</p>
<p>Here <code class="docutils literal notranslate"><span class="pre">$BUFFER_TYPE</span></code> is one of <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code>, <code class="docutils literal notranslate"><span class="pre">OFFSET</span></code> (for
variable-length types, such as strings and lists), <code class="docutils literal notranslate"><span class="pre">TYPE_ID</span></code> (for unions),
or <code class="docutils literal notranslate"><span class="pre">DATA</span></code>.</p>
<p><code class="docutils literal notranslate"><span class="pre">BufferData</span></code> is encoded based on the type of buffer:</p>
<ul>
<li><p><code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code>: a JSON array of 1 (valid) and 0 (null). Data for non-nullable
<code class="docutils literal notranslate"><span class="pre">Field</span></code> still has a <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code> array, even though all values are 1.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">OFFSET</span></code>: a JSON array of integers for 32-bit offsets or
string-formatted integers for 64-bit offsets.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">TYPE_ID</span></code>: a JSON array of integers.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">DATA</span></code>: a JSON array of encoded values.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">VARIADIC_DATA_BUFFERS</span></code>: a JSON array of data buffers represented as
hex encoded strings.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">VIEWS</span></code>: a JSON array of encoded views, which are JSON objects with:
* <code class="docutils literal notranslate"><span class="pre">SIZE</span></code>: an integer indicating the size of the view,
* <code class="docutils literal notranslate"><span class="pre">INLINED</span></code>: an encoded value (this field will be present if <code class="docutils literal notranslate"><span class="pre">SIZE</span></code></p>
<blockquote>
<div><p>is smaller than 12, otherwise the next three fields will be present),</p>
</div></blockquote>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">PREFIX_HEX</span></code>: the first four bytes of the view encoded as hex,</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">BUFFER_INDEX</span></code>: the index in <code class="docutils literal notranslate"><span class="pre">VARIADIC_DATA_BUFFERS</span></code> of the buffer
viewed,</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">OFFSET</span></code>: the offset in the buffer viewed.</p></li>
</ul>
</li>
</ul>
<p>The value encoding for <code class="docutils literal notranslate"><span class="pre">DATA</span></code> is different depending on the logical
type:</p>
<ul class="simple">
<li><p>For boolean type: an array of 1 (true) and 0 (false).</p></li>
<li><p>For integer-based types (including timestamps): an array of JSON numbers.</p></li>
<li><p>For 64-bit integers: an array of integers formatted as JSON strings,
so as to avoid loss of precision.</p></li>
<li><p>For floating point types: an array of JSON numbers. Values are limited
to 3 decimal places to avoid loss of precision.</p></li>
<li><p>For binary types, an array of uppercase hex-encoded strings, so as
to represent arbitrary binary data.</p></li>
<li><p>For UTF-8 string types, an array of JSON strings.</p></li>
</ul>
<p>For “list” and “largelist” types, <code class="docutils literal notranslate"><span class="pre">BufferData</span></code> has <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code> and
<code class="docutils literal notranslate"><span class="pre">OFFSET</span></code>, and the rest of the data is inside “children”. These child
<code class="docutils literal notranslate"><span class="pre">FieldData</span></code> contain all of the same attributes as non-child data, so in
the example of a list of <code class="docutils literal notranslate"><span class="pre">int32</span></code>, the child data has <code class="docutils literal notranslate"><span class="pre">VALIDITY</span></code> and
<code class="docutils literal notranslate"><span class="pre">DATA</span></code>.</p>
<p>For “fixedsizelist”, there is no <code class="docutils literal notranslate"><span class="pre">OFFSET</span></code> member because the offsets are
implied by the field’s “listSize”.</p>
<p>Note that the “count” for these child data may not match the parent “count”.
For example, if a <code class="docutils literal notranslate"><span class="pre">RecordBatch</span></code> has 7 rows and contains a <code class="docutils literal notranslate"><span class="pre">FixedSizeList</span></code>
of <code class="docutils literal notranslate"><span class="pre">listSize</span></code> 4, then the data inside the “children” of that <code class="docutils literal notranslate"><span class="pre">FieldData</span></code>
will have count 28.</p>
<p>For “null” type, <code class="docutils literal notranslate"><span class="pre">BufferData</span></code> does not contain any buffers.</p>
</section>
<section id="archery-integration-test-cases">
<h2>Archery Integration Test Cases<a class="headerlink" href="#archery-integration-test-cases" title="Permalink to this heading">#</a></h2>
<p>This list can make it easier to understand what manual testing may need to
be done for any future Arrow Format changes by knowing what cases the automated
integration testing actually tests.</p>
<p>There are two types of integration test cases: the ones populated on the fly
by the data generator in the Archery utility, and <em>gold</em> files that exist
in the <cite>arrow-testing &lt;https://github.com/apache/arrow-testing/tree/master/data/arrow-ipc-stream/integration&gt;</cite>
repository.</p>
<section id="data-generator-tests">
<h3>Data Generator Tests<a class="headerlink" href="#data-generator-tests" title="Permalink to this heading">#</a></h3>
<p>This is the high-level description of the cases which are generated and
tested using the <code class="docutils literal notranslate"><span class="pre">archery</span> <span class="pre">integration</span></code> command (see <code class="docutils literal notranslate"><span class="pre">get_generated_json_files</span></code>
in <code class="docutils literal notranslate"><span class="pre">datagen.py</span></code>):</p>
<ul class="simple">
<li><p>Primitive Types
- No Batches
- Various Primitive Values
- Batches with Zero Length
- String and Binary Large offset cases</p></li>
<li><p>Null Type
* Trivial Null batches</p></li>
<li><p>Decimal128</p></li>
<li><p>Decimal256</p></li>
<li><p>DateTime with various units</p></li>
<li><p>Durations with various units</p></li>
<li><p>Intervals
- MonthDayNano interval is a separate case</p></li>
<li><p>Map Types
- Non-Canonical Maps</p></li>
<li><p>Nested Types
- Lists
- Structs
- Lists with Large Offsets</p></li>
<li><p>Unions</p></li>
<li><p>Custom Metadata</p></li>
<li><p>Schemas with Duplicate Field Names</p></li>
<li><p>Dictionary Types
- Signed indices
- Unsigned indices
- Nested dictionaries</p></li>
<li><p>Run end encoded</p></li>
<li><p>Binary view and string view</p></li>
<li><p>List view and large list view</p></li>
<li><p>Extension Types</p></li>
</ul>
</section>
<section id="gold-file-integration-tests">
<h3>Gold File Integration Tests<a class="headerlink" href="#gold-file-integration-tests" title="Permalink to this heading">#</a></h3>
<p>Pre-generated json and arrow IPC files (both file and stream format) exist
in the <a class="reference external" href="https://github.com/apache/arrow-testing">arrow-testing</a> repository
in the <code class="docutils literal notranslate"><span class="pre">data/arrow-ipc-stream/integration</span></code> directory. These serve as
<em>gold</em> files that are assumed to be correct for use in testing. They are
referenced by <code class="docutils literal notranslate"><span class="pre">runner.py</span></code> in the code for the <a class="reference internal" href="../developers/continuous_integration/archery.html#archery"><span class="std std-ref">Archery</span></a>
utility. Below are the test cases which are covered by them:</p>
<ul class="simple">
<li><p>Backwards Compatibility</p>
<ul>
<li><p>The following cases are tested using the 0.14.1 format:</p>
<ul>
<li><p>datetime</p></li>
<li><p>decimals</p></li>
<li><p>dictionaries</p></li>
<li><p>intervals</p></li>
<li><p>maps</p></li>
<li><p>nested types (list, struct)</p></li>
<li><p>primitives</p></li>
<li><p>primitive with no batches</p></li>
<li><p>primitive with zero length batches</p></li>
</ul>
</li>
<li><p>The following is tested for 0.17.1 format:</p>
<ul>
<li><p>unions</p></li>
</ul>
</li>
</ul>
</li>
<li><p>Endianness</p>
<ul>
<li><p>The following cases are tested with both Little Endian and Big Endian versions for auto conversion</p>
<ul>
<li><p>custom metadata</p></li>
<li><p>datetime</p></li>
<li><p>decimals</p></li>
<li><p>decimal256</p></li>
<li><p>dictionaries</p></li>
<li><p>dictionaries with unsigned indices</p></li>
<li><p>record batches with duplicate fieldnames</p></li>
<li><p>extension types</p></li>
<li><p>interval types</p></li>
<li><p>map types</p></li>
<li><p>non-canonical map data</p></li>
<li><p>nested types (lists, structs)</p></li>
<li><p>nested dictionaries</p></li>
<li><p>nested large offset types</p></li>
<li><p>nulls</p></li>
<li><p>primitive data</p></li>
<li><p>large offset binary and strings</p></li>
<li><p>primitives with no batches included</p></li>
<li><p>primitive batches with zero length</p></li>
<li><p>recursive nested types</p></li>
<li><p>union types</p></li>
</ul>
</li>
</ul>
</li>
<li><p>Compression tests</p>
<ul>
<li><p>LZ4</p></li>
<li><p>ZSTD</p></li>
</ul>
</li>
<li><p>Batches with Shared Dictionaries</p></li>
</ul>
</section>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="Changing.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Changing the Apache Arrow Format Specification</p>
</div>
</a>
<a class="right-next"
href="Glossary.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Glossary</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#strategy">Strategy</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#example-ipc-format">Example: IPC format</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#example-c-data-interface">Example: C Data Interface</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-integration-tests">Running integration tests</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#json-test-data-format">JSON test data format</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#archery-integration-test-cases">Archery Integration Test Cases</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-generator-tests">Data Generator Tests</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gold-file-integration-tests">Gold File Integration Tests</a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/format/Integration.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>