| |
| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> |
| |
| <title>Statistics schema — Apache Arrow v21.0.0</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; |
| </script> |
| <!-- |
| this give us a css class that will be invisible only if js is disabled |
| --> |
| <noscript> |
| <style> |
| .pst-js-only { display: none !important; } |
| |
| </style> |
| </noscript> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/sphinx-design.4cbf315f70debaebd550c87a6162cf0f.min.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" /> |
| |
| <!-- So that users can add custom icons --> |
| <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script> |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" /> |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" /> |
| |
| <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script> |
| <script src="../_static/doctools.js"></script> |
| <script src="../_static/sphinx_highlight.js"></script> |
| <script src="../_static/clipboard.min.js"></script> |
| <script src="../_static/copybutton.js"></script> |
| <script src="../_static/design-tabs.js"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'format/StatisticsSchema';</script> |
| <script> |
| DOCUMENTATION_OPTIONS.theme_version = '0.16.1'; |
| DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json'; |
| DOCUMENTATION_OPTIONS.theme_switcher_version_match = ''; |
| DOCUMENTATION_OPTIONS.show_version_warning_banner = |
| false; |
| </script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/format/StatisticsSchema.html" /> |
| <link rel="icon" href="../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Dissociated IPC Protocol" href="DissociatedIPC.html" /> |
| <link rel="prev" title="The Arrow C Device data interface" href="CDeviceDataInterface.html" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| <meta name="docsearch:version" content="21.0.0" /> |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i>Back to top</button> |
| |
| |
| <dialog id="pst-search-dialog"> |
| |
| <form class="bd-search d-flex align-items-center" |
| action="../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form> |
| </dialog> |
| |
| <div class="pst-async-banner-revealer d-none"> |
| <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> |
| </div> |
| |
| |
| <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> |
| <div class="bd-header__inner bd-page-width"> |
| <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation"> |
| <span class="fa-solid fa-bars"></span> |
| </button> |
| |
| |
| <div class=" navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| |
| |
| <a class="navbar-brand logo" href="../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v21.0.0 - Home"/> |
| <img src="../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v21.0.0 - Home"/> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| <div class=" navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-2" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-2" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-2" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-2"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| |
| <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page"> |
| <span class="fa-solid fa-outdent"></span> |
| </button> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <dialog id="pst-primary-sidebar-modal"></dialog> |
| <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| Specifications |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../developers/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../implementations.html"> |
| Implementations |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><div class="kapa-ai-bot"> |
| <script |
| async |
| src="https://widget.kapa.ai/kapa-widget.bundle.js" |
| data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2" |
| data-project-name="Apache Arrow" |
| data-project-color="#000000" |
| data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png" |
| data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc." |
| data-consent-required="true" |
| data-user-analytics-cookie-enabled="false" |
| data-consent-screen-disclaimer="By clicking "I agree, let's chat", you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies." |
| ></script> |
| |
| </div> |
| |
| </div> |
| |
| <div class="navbar-item"> |
| <div class="version-switcher__container dropdown pst-js-only"> |
| <button id="pst-version-switcher-button-3" |
| type="button" |
| class="version-switcher__button btn btn-sm dropdown-toggle" |
| data-bs-toggle="dropdown" |
| aria-haspopup="listbox" |
| aria-controls="pst-version-switcher-list-3" |
| aria-label="Version switcher list" |
| > |
| Choose version <!-- this text may get changed later by javascript --> |
| <span class="caret"></span> |
| </button> |
| <div id="pst-version-switcher-list-3" |
| class="version-switcher__menu dropdown-menu list-group-flush py-0" |
| role="listbox" aria-labelledby="pst-version-switcher-button-3"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div></div> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">GitHub</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">LinkedIn</span></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i> |
| <span class="sr-only">BlueSky</span></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <nav class="bd-docs-nav bd-links" |
| aria-label="Section Navigation"> |
| <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p> |
| <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="Intro.html">Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Columnar.html">Arrow Columnar Format</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Versioning.html">Format Versioning and Stability</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Changing.html">Changing the Apache Arrow Format Specification</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CanonicalExtensions.html">Canonical Extension Types</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Other.html">Other Data Structures</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="CDataInterface.html">The Arrow C data interface</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="CDataInterface/PyCapsuleInterface.html">The Arrow PyCapsule Interface</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="CStreamInterface.html">The Arrow C stream interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="CDeviceDataInterface.html">The Arrow C Device data interface</a></li> |
| <li class="toctree-l1 current active"><a class="current reference internal" href="#">Statistics schema</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="DissociatedIPC.html">Dissociated IPC Protocol</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Flight.html">Arrow Flight RPC</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="FlightSql.html">Arrow Flight SQL</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ADBC.html">ADBC: Arrow Database Connectivity</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Integration.html">Integration Testing</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="Glossary.html">Glossary</a></li> |
| </ul> |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <div id="ethical-ad-placement" |
| class="flat" |
| data-ea-publisher="readthedocs" |
| data-ea-type="readthedocs-sidebar" |
| data-ea-manual="true"> |
| </div></div> |
| </div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main" role="main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article d-print-none"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| <nav aria-label="Breadcrumb" class="d-print-none"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="index.html" class="nav-link">Specifications</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Statistics schema</span></li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <section id="statistics-schema"> |
| <span id="id1"></span><h1>Statistics schema<a class="headerlink" href="#statistics-schema" title="Permalink to this heading">#</a></h1> |
| <div class="admonition warning"> |
| <p class="admonition-title">Warning</p> |
| <p>This specification should be considered experimental.</p> |
| </div> |
| <section id="rationale"> |
| <h2>Rationale<a class="headerlink" href="#rationale" title="Permalink to this heading">#</a></h2> |
| <p>Statistics are useful for fast query processing. Many query engines |
| use statistics to optimize their query plan.</p> |
| <p>Apache Arrow format doesn’t have statistics but other formats that can |
| be read as Apache Arrow data may have statistics. For example, the |
| Apache Parquet C++ implementation can read an Apache Parquet file as |
| Apache Arrow data and the Apache Parquet file may have statistics.</p> |
| <p>We standardize the representation of statistics as an Apache Arrow |
| array for ease of exchange.</p> |
| <section id="use-case"> |
| <h3>Use case<a class="headerlink" href="#use-case" title="Permalink to this heading">#</a></h3> |
| <p>One of <a class="reference internal" href="CStreamInterface.html#c-stream-interface"><span class="std std-ref">The Arrow C stream interface</span></a> use cases is the following:</p> |
| <ol class="arabic simple"> |
| <li><p>Module A reads Apache Parquet file as Apache Arrow data.</p></li> |
| <li><p>Module A passes the read Apache Arrow data to module B through the |
| Arrow C stream interface.</p></li> |
| <li><p>Module B processes the passed Apache Arrow data.</p></li> |
| </ol> |
| <p>If module A can pass the statistics associated with the Apache Parquet |
| file to module B, module B can use the statistics to optimize its |
| query plan.</p> |
| <p>For example, DuckDB uses this approach but DuckDB couldn’t use |
| statistics because there wasn’t a standardized way to represent |
| statistics for the Apache Arrow data.</p> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p><a class="reference external" href="https://github.com/duckdb/duckdb/blob/v1.1.3/src/function/table/arrow.cpp#L373-L403">duckdb::ArrowTableFunction::ArrowScanBind() in DuckDB 1.1.3</a></p> |
| </div> |
| </section> |
| <section id="goals"> |
| <h3>Goals<a class="headerlink" href="#goals" title="Permalink to this heading">#</a></h3> |
| <ul class="simple"> |
| <li><p>Establish a standard way to represent statistics as an Apache Arrow |
| array.</p></li> |
| </ul> |
| </section> |
| <section id="non-goals"> |
| <h3>Non-goals<a class="headerlink" href="#non-goals" title="Permalink to this heading">#</a></h3> |
| <ul class="simple"> |
| <li><p>Establish a standard way to pass an Apache Arrow array that |
| represents statistics.</p></li> |
| <li><p>Establish a standard way to embed statistics into an Apache Arrow |
| array itself.</p></li> |
| </ul> |
| </section> |
| </section> |
| <section id="schema"> |
| <h2>Schema<a class="headerlink" href="#schema" title="Permalink to this heading">#</a></h2> |
| <p>This specification provides only the schema for statistics. This is |
| the canonical schema to represent statistics about an Apache Arrow |
| dataset as Apache Arrow data.</p> |
| <p>Here is the outline of the schema for statistics:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">column</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> |
| <span class="n">statistics</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span> |
| <span class="n">key</span><span class="p">:</span> <span class="n">dictionary</span><span class="o"><</span><span class="n">values</span><span class="p">:</span> <span class="n">utf8</span><span class="p">,</span> <span class="n">indices</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span><span class="p">,</span> |
| <span class="n">items</span><span class="p">:</span> <span class="n">dense_union</span><span class="o"><...</span><span class="n">all</span> <span class="n">needed</span> <span class="n">types</span><span class="o">...></span> |
| <span class="o">></span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>Here is the details of top-level <code class="docutils literal notranslate"><span class="pre">struct</span></code>:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Name</p></th> |
| <th class="head"><p>Data type</p></th> |
| <th class="head"><p>Nullable</p></th> |
| <th class="head"><p>Notes</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">column</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">int32</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">true</span></code></p></td> |
| <td><p>The zero-based column index, or null if the statistics |
| describe the whole table or record batch.</p> |
| <p>The column index is computed as the same rule used by |
| <a class="reference internal" href="Columnar.html#ipc-recordbatch-message"><span class="std std-ref">RecordBatch message</span></a>.</p> |
| </td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">statistics</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">map</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">false</span></code></p></td> |
| <td><p>Statistics for the target column, table or record batch. See |
| the separate table below for details.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Here is the details of the <code class="docutils literal notranslate"><span class="pre">map</span></code> of the <code class="docutils literal notranslate"><span class="pre">statistics</span></code>:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Key or items</p></th> |
| <th class="head"><p>Data type</p></th> |
| <th class="head"><p>Nullable</p></th> |
| <th class="head"><p>Notes</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p>key</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">dictionary<values:</span> <span class="pre">utf8,</span> <span class="pre">indices:</span> <span class="pre">int32></span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">false</span></code></p></td> |
| <td><p>The string key is the name of the |
| statistic. Dictionary-encoding is used for efficiency as the |
| same statistic may be repeated for different columns. |
| Different keys are assigned for exact and approximate statistic |
| values. Each statistic has their own description below.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>items</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">dense_union</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">false</span></code></p></td> |
| <td><p>Statistics value is dense union. It has at least all needed |
| types based on statistics kinds in the keys. For example, you |
| need at least <code class="docutils literal notranslate"><span class="pre">int64</span></code> and <code class="docutils literal notranslate"><span class="pre">float64</span></code> types when you have a |
| <code class="docutils literal notranslate"><span class="pre">int64</span></code> distinct count statistic and a <code class="docutils literal notranslate"><span class="pre">float64</span></code> average |
| byte width statistic. See the description of each statistic below.</p> |
| <p>Dense union arrays have names for each field but we don’t standardize |
| field names for these because we can access the proper |
| field by type code instead. So we can use any valid name for |
| the fields.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <section id="standard-statistics"> |
| <span id="statistics-schema-name"></span><h3>Standard statistics<a class="headerlink" href="#standard-statistics" title="Permalink to this heading">#</a></h3> |
| <p>Each statistic kind has a name that appears as a key in the statistics |
| map for each column or entire table. <code class="docutils literal notranslate"><span class="pre">dictionary<values:</span> <span class="pre">utf8,</span> |
| <span class="pre">indices:</span> <span class="pre">int32></span></code> is used to encode the name for space-efficiency.</p> |
| <p>We assign different names for variations of the same statistic instead |
| of using flags. For example, we assign different statistic names for |
| exact and approximate values of the “distinct count” statistic.</p> |
| <p>The colon symbol <code class="docutils literal notranslate"><span class="pre">:</span></code> is to be used as a namespace separator like |
| <a class="reference internal" href="Columnar.html#format-metadata"><span class="std std-ref">Custom Application Metadata</span></a>. It can be used multiple times in a name.</p> |
| <p>The <code class="docutils literal notranslate"><span class="pre">ARROW</span></code> prefix is a reserved namespace for pre-defined statistic |
| names in current and future versions of this specification. |
| User-defined statistics must not use it. For example, you can use your |
| product name as namespace such as <code class="docutils literal notranslate"><span class="pre">MY_PRODUCT:my_statistics:exact</span></code>.</p> |
| <p>Here are pre-defined statistics names:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Name</p></th> |
| <th class="head"><p>Data type</p></th> |
| <th class="head"><p>Notes</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:average_byte_width:exact</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">float64</span></code></p></td> |
| <td><p>The average size in bytes of a row in the target |
| column. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:average_byte_width:approximate</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">float64</span></code></p></td> |
| <td><p>The average size in bytes of a row in the target |
| column. (approximate)</p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:distinct_count:exact</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">int64</span></code></p></td> |
| <td><p>The number of distinct values in the target column. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:distinct_count:approximate</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">float64</span></code></p></td> |
| <td><p>The number of distinct values in the target |
| column. (approximate)</p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:max_byte_width:exact</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">int64</span></code></p></td> |
| <td><p>The maximum size in bytes of a row in the target |
| column. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:max_byte_width:approximate</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">float64</span></code></p></td> |
| <td><p>The maximum size in bytes of a row in the target |
| column. (approximate)</p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:max_value:exact</span></code></p></td> |
| <td><p>Target dependent</p></td> |
| <td><p>The maximum value in the target column. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:max_value:approximate</span></code></p></td> |
| <td><p>Target dependent</p></td> |
| <td><p>The maximum value in the target column. (approximate)</p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:min_value:exact</span></code></p></td> |
| <td><p>Target dependent</p></td> |
| <td><p>The minimum value in the target column. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:min_value:approximate</span></code></p></td> |
| <td><p>Target dependent</p></td> |
| <td><p>The minimum value in the target column. (approximate)</p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:null_count:exact</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">int64</span></code></p></td> |
| <td><p>The number of nulls in the target column. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:null_count:approximate</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">float64</span></code></p></td> |
| <td><p>The number of nulls in the target column. (approximate)</p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:row_count:exact</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">int64</span></code></p></td> |
| <td><p>The number of rows in the target table, record batch or |
| array. (exact)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">ARROW:row_count:approximate</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">float64</span></code></p></td> |
| <td><p>The number of rows in the target table, record batch or |
| array. (approximate)</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>If you find a statistic that might be useful to multiple systems, |
| please propose it on the <a class="reference external" href="https://arrow.apache.org/community/">Apache Arrow development mailing-list</a>.</p> |
| <p>Interoperability improves when producers and consumers of statistics |
| follow a previously agreed upon statistic specification.</p> |
| </section> |
| </section> |
| <section id="examples"> |
| <span id="statistics-schema-examples"></span><h2>Examples<a class="headerlink" href="#examples" title="Permalink to this heading">#</a></h2> |
| <p>Here are some examples to help you understand.</p> |
| <section id="simple-record-batch"> |
| <h3>Simple record batch<a class="headerlink" href="#simple-record-batch" title="Permalink to this heading">#</a></h3> |
| <p>Schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">vendor_id</span><span class="p">:</span> <span class="n">int32</span> |
| <span class="n">passenger_count</span><span class="p">:</span> <span class="n">int64</span> |
| </pre></div> |
| </div> |
| <p>Data:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">vendor_id</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span> |
| <span class="n">passenger_count</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">null</span><span class="p">]</span> |
| </pre></div> |
| </div> |
| <p>Statistics:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Target</p></th> |
| <th class="head"><p>Name</p></th> |
| <th class="head"><p>Value</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p>Record batch</p></td> |
| <td><p>The number of rows</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">5</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td rowspan="4"><p> <code class="docutils literal notranslate"><span class="pre">vendor_id</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The number of distinct values</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">2</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">5</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td rowspan="5"><p> <code class="docutils literal notranslate"><span class="pre">passenger_count</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The number of distinct values</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">2</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Column indexes:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Index</p></th> |
| <th class="head"><p>Target</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">vendor_id</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">passenger_count</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Statistics schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">column</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> |
| <span class="n">statistics</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span> |
| <span class="n">key</span><span class="p">:</span> <span class="n">dictionary</span><span class="o"><</span><span class="n">values</span><span class="p">:</span> <span class="n">utf8</span><span class="p">,</span> <span class="n">indices</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span><span class="p">,</span> |
| <span class="n">items</span><span class="p">:</span> <span class="n">dense_union</span><span class="o"><</span><span class="mi">0</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> |
| <span class="o">></span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>Statistics array:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">column</span><span class="p">:</span> <span class="p">[</span> |
| <span class="n">null</span><span class="p">,</span> <span class="c1"># record batch</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># vendor_id</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># passenger_count</span> |
| <span class="p">]</span> |
| <span class="n">statistics</span><span class="p">:</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># record batch: 1 value: [0]</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># vendor_id: 4 values: [1, 2, 3, 4]</span> |
| <span class="mi">9</span><span class="p">,</span> <span class="c1"># passenger_count: 4 values: [5, 6, 7, 8]</span> |
| <span class="p">]</span> |
| <span class="n">key</span><span class="p">:</span> |
| <span class="n">values</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s2">"ARROW:row_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:null_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:distinct_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:max_value:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:min_value:exact"</span><span class="p">,</span> |
| <span class="p">]</span> |
| <span class="n">indices</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># "ARROW:distinct_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:exact"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># "ARROW:distinct_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:exact"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:exact"</span> |
| <span class="p">]</span> |
| <span class="n">items</span><span class="p">:</span> |
| <span class="n">children</span><span class="p">:</span> |
| <span class="mi">0</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># int64</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># record batch: "ARROW:row_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># vendor_id: "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># vendor_id: "ARROW:distinct_count:exact"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># vendor_id: "ARROW:max_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># vendor_id: "ARROW:min_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># passenger_count: "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># passenger_count: "ARROW:distinct_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># passenger_count: "ARROW:max_value:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># passenger_count: "ARROW:min_value:exact"</span> |
| <span class="p">]</span> |
| <span class="n">types</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># all values are int64</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="p">]</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">1</span><span class="p">,</span> |
| <span class="mi">2</span><span class="p">,</span> |
| <span class="mi">3</span><span class="p">,</span> |
| <span class="mi">4</span><span class="p">,</span> |
| <span class="mi">5</span><span class="p">,</span> |
| <span class="mi">6</span><span class="p">,</span> |
| <span class="mi">7</span><span class="p">,</span> |
| <span class="mi">8</span><span class="p">,</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="complex-record-batch"> |
| <h3>Complex record batch<a class="headerlink" href="#complex-record-batch" title="Permalink to this heading">#</a></h3> |
| <p>This uses nested types.</p> |
| <p>Schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">col1</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">a</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">c</span><span class="p">:</span> <span class="n">float64</span><span class="o">></span> |
| <span class="n">col2</span><span class="p">:</span> <span class="n">utf8</span> |
| </pre></div> |
| </div> |
| <p>Data:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">col1</span><span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">],</span> <span class="n">c</span><span class="p">:</span> <span class="mf">2.9</span><span class="p">},</span> |
| <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="n">null</span><span class="p">,</span> <span class="n">c</span><span class="p">:</span> <span class="o">-</span><span class="mf">2.9</span><span class="p">},</span> |
| <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="p">[</span><span class="mi">99</span><span class="p">],</span> <span class="n">c</span><span class="p">:</span> <span class="n">null</span><span class="p">},</span> |
| <span class="p">]</span> |
| <span class="n">col2</span><span class="p">:</span> <span class="p">[</span><span class="s2">"x"</span><span class="p">,</span> <span class="n">null</span><span class="p">,</span> <span class="s2">"z"</span><span class="p">]</span> |
| </pre></div> |
| </div> |
| <p>Statistics:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Target</p></th> |
| <th class="head"><p>Name</p></th> |
| <th class="head"><p>Value</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p>Record batch</p></td> |
| <td><p>The number of rows</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">col1</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td rowspan="4"><p> <code class="docutils literal notranslate"><span class="pre">col1.a</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The number of distinct values</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The approximate max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">5</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The approximate min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">col1.b</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td rowspan="2"><p> <code class="docutils literal notranslate"><span class="pre">col1.b.item</span></code></p></td> |
| <td><p>The max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">99</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">20</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td rowspan="3"><p> <code class="docutils literal notranslate"><span class="pre">col1.c</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The approximate max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3.0</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The approximate min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">-3.0</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td rowspan="2"><p> <code class="docutils literal notranslate"><span class="pre">col2</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The number of distinct values</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">2</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Column indexes:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Index</p></th> |
| <th class="head"><p>Target</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">col1</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">col1.a</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">2</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">col1.b</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">col1.b.item</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">4</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">col1.c</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">5</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">col2</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>See also <a class="reference internal" href="Columnar.html#ipc-recordbatch-message"><span class="std std-ref">RecordBatch message</span></a> how to compute column indexes.</p> |
| <p>Statistics schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">column</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> |
| <span class="n">statistics</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span> |
| <span class="n">key</span><span class="p">:</span> <span class="n">dictionary</span><span class="o"><</span><span class="n">values</span><span class="p">:</span> <span class="n">utf8</span><span class="p">,</span> <span class="n">indices</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span><span class="p">,</span> |
| <span class="n">items</span><span class="p">:</span> <span class="n">dense_union</span><span class="o"><</span> |
| <span class="c1"># For the number of rows, the number of nulls and so on.</span> |
| <span class="mi">0</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> |
| <span class="c1"># For the max/min values of col1.c.</span> |
| <span class="mi">1</span><span class="p">:</span> <span class="n">float64</span> |
| <span class="o">></span> |
| <span class="o">></span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>Statistics array:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">column</span><span class="p">:</span> <span class="p">[</span> |
| <span class="n">null</span><span class="p">,</span> <span class="c1"># record batch</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># col1</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># col1.a</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># col1.b</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># col1.b.item</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># col1.c</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># col2</span> |
| <span class="p">]</span> |
| <span class="n">statistics</span><span class="p">:</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># record batch: 1 value: [0]</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># col1: 1 value: [1]</span> |
| <span class="mi">6</span><span class="p">,</span> <span class="c1"># col1.a: 4 values: [2, 3, 4, 5]</span> |
| <span class="mi">7</span><span class="p">,</span> <span class="c1"># col1.b: 1 value: [6]</span> |
| <span class="mi">9</span><span class="p">,</span> <span class="c1"># col1.b.item: 2 values: [7, 8]</span> |
| <span class="mi">12</span><span class="p">,</span> <span class="c1"># col1.c: 3 values: [9, 10, 11]</span> |
| <span class="mi">14</span><span class="p">,</span> <span class="c1"># col2: 2 values: [12, 13]</span> |
| <span class="p">]</span> |
| <span class="n">key</span><span class="p">:</span> |
| <span class="n">values</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s2">"ARROW:row_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:null_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:distinct_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:max_value:approximate"</span><span class="p">,</span> |
| <span class="s2">"ARROW:min_value:approximate"</span><span class="p">,</span> |
| <span class="s2">"ARROW:max_value:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:min_value:exact"</span><span class="p">,</span> |
| <span class="p">]</span> |
| <span class="n">indices</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># "ARROW:distinct_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:approximate"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:exact"</span> |
| <span class="mi">6</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:approximate"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># "ARROW:distinct_count:exact"</span> |
| <span class="p">]</span> |
| <span class="n">items</span><span class="p">:</span> |
| <span class="n">children</span><span class="p">:</span> |
| <span class="mi">0</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># int64</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># record batch: "ARROW:row_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># col1: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># col1.a: "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># col1.a: "ARROW:distinct_count:exact"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># col1.a: "ARROW:max_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># col1.a: "ARROW:min_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># col1.b: "ARROW:null_count:exact"</span> |
| <span class="mi">99</span><span class="p">,</span> <span class="c1"># col1.b.item: "ARROW:max_value:exact"</span> |
| <span class="mi">20</span><span class="p">,</span> <span class="c1"># col1.b.item: "ARROW:min_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># col1.c: "ARROW:null_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># col2: "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># col2: "ARROW:distinct_count:exact"</span> |
| <span class="p">]</span> |
| <span class="mi">1</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># float64</span> |
| <span class="mf">3.0</span><span class="p">,</span> <span class="c1"># col1.c: "ARROW:max_value:approximate"</span> |
| <span class="o">-</span><span class="mf">3.0</span><span class="p">,</span> <span class="c1"># col1.c: "ARROW:min_value:approximate"</span> |
| <span class="p">]</span> |
| <span class="n">types</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: record batch: "ARROW:row_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:distinct_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:max_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:min_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.b: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.b.item: "ARROW:max_value:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.b.item: "ARROW:min_value:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col1.c: "ARROW:null_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># float64: col1.c: "ARROW:max_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># float64: col1.c: "ARROW:min_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col2: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: col2: "ARROW:distinct_count:exact"</span> |
| <span class="p">]</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: record batch: "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># int64: col1: "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:distinct_count:exact"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:max_value:approximate"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># int64: col1.a: "ARROW:min_value:approximate"</span> |
| <span class="mi">6</span><span class="p">,</span> <span class="c1"># int64: col1.b: "ARROW:null_count:exact"</span> |
| <span class="mi">7</span><span class="p">,</span> <span class="c1"># int64: col1.b.item: "ARROW:max_value:exact"</span> |
| <span class="mi">8</span><span class="p">,</span> <span class="c1"># int64: col1.b.item: "ARROW:min_value:exact"</span> |
| <span class="mi">9</span><span class="p">,</span> <span class="c1"># int64: col1.c: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># float64: col1.c: "ARROW:max_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># float64: col1.c: "ARROW:min_value:approximate"</span> |
| <span class="mi">10</span><span class="p">,</span> <span class="c1"># int64: col2: "ARROW:null_count:exact"</span> |
| <span class="mi">11</span><span class="p">,</span> <span class="c1"># int64: col2: "ARROW:distinct_count:exact"</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="simple-array"> |
| <h3>Simple array<a class="headerlink" href="#simple-array" title="Permalink to this heading">#</a></h3> |
| <p>Schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">int64</span> |
| </pre></div> |
| </div> |
| <p>Data:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">null</span><span class="p">]</span> |
| </pre></div> |
| </div> |
| <p>Statistics:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Target</p></th> |
| <th class="head"><p>Name</p></th> |
| <th class="head"><p>Value</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td rowspan="5"><p> Array</p></td> |
| <td><p>The number of rows</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">5</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The number of distinct values</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">2</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Column indexes:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Index</p></th> |
| <th class="head"><p>Target</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| <td><p>Array</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Statistics schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">column</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> |
| <span class="n">statistics</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span> |
| <span class="n">key</span><span class="p">:</span> <span class="n">dictionary</span><span class="o"><</span><span class="n">values</span><span class="p">:</span> <span class="n">utf8</span><span class="p">,</span> <span class="n">indices</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span><span class="p">,</span> |
| <span class="n">items</span><span class="p">:</span> <span class="n">dense_union</span><span class="o"><</span><span class="mi">0</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> |
| <span class="o">></span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>Statistics array:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">column</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># array</span> |
| <span class="p">]</span> |
| <span class="n">statistics</span><span class="p">:</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># array: 5 values: [0, 1, 2, 3, 4]</span> |
| <span class="p">]</span> |
| <span class="n">key</span><span class="p">:</span> |
| <span class="n">values</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s2">"ARROW:row_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:null_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:distinct_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:max_value:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:min_value:exact"</span><span class="p">,</span> |
| <span class="p">]</span> |
| <span class="n">indices</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># "ARROW:distinct_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:exact"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:exact"</span> |
| <span class="p">]</span> |
| <span class="n">items</span><span class="p">:</span> |
| <span class="n">children</span><span class="p">:</span> |
| <span class="mi">0</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># int64</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># array: "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># array: "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># array: "ARROW:distinct_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># array: "ARROW:max_value:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># array: "ARROW:min_value:exact"</span> |
| <span class="p">]</span> |
| <span class="n">types</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># all values are int64</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="p">]</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">1</span><span class="p">,</span> |
| <span class="mi">2</span><span class="p">,</span> |
| <span class="mi">3</span><span class="p">,</span> |
| <span class="mi">4</span><span class="p">,</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="complex-array"> |
| <h3>Complex array<a class="headerlink" href="#complex-array" title="Permalink to this heading">#</a></h3> |
| <p>This uses nested types.</p> |
| <p>Schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span><span class="n">a</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">c</span><span class="p">:</span> <span class="n">float64</span><span class="o">></span> |
| </pre></div> |
| </div> |
| <p>Data:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span> |
| <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">],</span> <span class="n">c</span><span class="p">:</span> <span class="mf">2.9</span><span class="p">},</span> |
| <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="n">null</span><span class="p">,</span> <span class="n">c</span><span class="p">:</span> <span class="o">-</span><span class="mf">2.9</span><span class="p">},</span> |
| <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="n">b</span><span class="p">:</span> <span class="p">[</span><span class="mi">99</span><span class="p">],</span> <span class="n">c</span><span class="p">:</span> <span class="n">null</span><span class="p">},</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| <p>Statistics:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Target</p></th> |
| <th class="head"><p>Name</p></th> |
| <th class="head"><p>Value</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td rowspan="2"><p> Array</p></td> |
| <td><p>The number of rows</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td rowspan="4"><p> <code class="docutils literal notranslate"><span class="pre">a</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The number of distinct values</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The approximate max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">5</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The approximate min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">b</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td rowspan="2"><p> <code class="docutils literal notranslate"><span class="pre">b.item</span></code></p></td> |
| <td><p>The max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">99</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">20</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td rowspan="3"><p> <code class="docutils literal notranslate"><span class="pre">c</span></code></p></td> |
| <td><p>The number of nulls</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>The approximate max value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">3.0</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>The approximate min value</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">-3.0</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>Column indexes:</p> |
| <div class="pst-scrollable-table-container"><table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Index</p></th> |
| <th class="head"><p>Target</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">0</span></code></p></td> |
| <td><p>Array</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">1</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">a</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">2</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">b</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">3</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">b.item</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">4</span></code></p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">c</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <p>See also <a class="reference internal" href="Columnar.html#ipc-recordbatch-message"><span class="std std-ref">RecordBatch message</span></a> how to compute column indexes.</p> |
| <p>Statistics schema:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">struct</span><span class="o"><</span> |
| <span class="n">column</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> |
| <span class="n">statistics</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span> |
| <span class="n">key</span><span class="p">:</span> <span class="n">dictionary</span><span class="o"><</span><span class="n">values</span><span class="p">:</span> <span class="n">utf8</span><span class="p">,</span> <span class="n">indices</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span><span class="p">,</span> |
| <span class="n">items</span><span class="p">:</span> <span class="n">dense_union</span><span class="o"><</span> |
| <span class="c1"># For the number of rows, the number of nulls and so on.</span> |
| <span class="mi">0</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> |
| <span class="c1"># For the max/min values of c.</span> |
| <span class="mi">1</span><span class="p">:</span> <span class="n">float64</span> |
| <span class="o">></span> |
| <span class="o">></span> |
| <span class="o">></span> |
| </pre></div> |
| </div> |
| <p>Statistics array:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">column</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># array</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># a</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># b</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># b.item</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># c</span> |
| <span class="p">]</span> |
| <span class="n">statistics</span><span class="p">:</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># array: 2 values: [0, 1]</span> |
| <span class="mi">6</span><span class="p">,</span> <span class="c1"># a: 4 values: [2, 3, 4, 5]</span> |
| <span class="mi">7</span><span class="p">,</span> <span class="c1"># b: 1 value: [6]</span> |
| <span class="mi">9</span><span class="p">,</span> <span class="c1"># b.item: 2 values: [7, 8]</span> |
| <span class="mi">12</span><span class="p">,</span> <span class="c1"># c: 3 values: [9, 10, 11]</span> |
| <span class="p">]</span> |
| <span class="n">key</span><span class="p">:</span> |
| <span class="n">values</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s2">"ARROW:row_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:null_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:distinct_count:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:max_value:approximate"</span><span class="p">,</span> |
| <span class="s2">"ARROW:min_value:approximate"</span><span class="p">,</span> |
| <span class="s2">"ARROW:max_value:exact"</span><span class="p">,</span> |
| <span class="s2">"ARROW:min_value:exact"</span><span class="p">,</span> |
| <span class="p">]</span> |
| <span class="n">indices</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># "ARROW:distinct_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:approximate"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:exact"</span> |
| <span class="mi">6</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># "ARROW:max_value:approximate"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># "ARROW:min_value:approximate"</span> |
| <span class="p">]</span> |
| <span class="n">items</span><span class="p">:</span> |
| <span class="n">children</span><span class="p">:</span> |
| <span class="mi">0</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># int64</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># array: "ARROW:row_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># array: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># a: "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># a: "ARROW:distinct_count:exact"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># a: "ARROW:max_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># a: "ARROW:min_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># b: "ARROW:null_count:exact"</span> |
| <span class="mi">99</span><span class="p">,</span> <span class="c1"># b.item: "ARROW:max_value:exact"</span> |
| <span class="mi">20</span><span class="p">,</span> <span class="c1"># b.item: "ARROW:min_value:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># c: "ARROW:null_count:exact"</span> |
| <span class="p">]</span> |
| <span class="mi">1</span><span class="p">:</span> <span class="p">[</span> <span class="c1"># float64</span> |
| <span class="mf">3.0</span><span class="p">,</span> <span class="c1"># c: "ARROW:max_value:approximate"</span> |
| <span class="o">-</span><span class="mf">3.0</span><span class="p">,</span> <span class="c1"># c: "ARROW:min_value:approximate"</span> |
| <span class="p">]</span> |
| <span class="n">types</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: array: "ARROW:row_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: array: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:distinct_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:max_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:min_value:approximate"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: b: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: b.item: "ARROW:max_value:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: b.item: "ARROW:min_value:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: c: "ARROW:null_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># float64: c: "ARROW:max_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># float64: c: "ARROW:min_value:approximate"</span> |
| <span class="p">]</span> |
| <span class="n">offsets</span><span class="p">:</span> <span class="p">[</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># int64: array: "ARROW:row_count:exact"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># int64: array: "ARROW:null_count:exact"</span> |
| <span class="mi">2</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:null_count:exact"</span> |
| <span class="mi">3</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:distinct_count:exact"</span> |
| <span class="mi">4</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:max_value:approximate"</span> |
| <span class="mi">5</span><span class="p">,</span> <span class="c1"># int64: a: "ARROW:min_value:approximate"</span> |
| <span class="mi">6</span><span class="p">,</span> <span class="c1"># int64: b: "ARROW:null_count:exact"</span> |
| <span class="mi">7</span><span class="p">,</span> <span class="c1"># int64: b.item: "ARROW:max_value:exact"</span> |
| <span class="mi">8</span><span class="p">,</span> <span class="c1"># int64: b.item: "ARROW:min_value:exact"</span> |
| <span class="mi">9</span><span class="p">,</span> <span class="c1"># int64: c: "ARROW:null_count:exact"</span> |
| <span class="mi">0</span><span class="p">,</span> <span class="c1"># float64: c: "ARROW:max_value:approximate"</span> |
| <span class="mi">1</span><span class="p">,</span> <span class="c1"># float64: c: "ARROW:min_value:approximate"</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| </section> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer d-print-none"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="CDeviceDataInterface.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">The Arrow C Device data interface</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="DissociatedIPC.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Dissociated IPC Protocol</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <dialog id="pst-secondary-sidebar-modal"></dialog> |
| <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#rationale">Rationale</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#use-case">Use case</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#goals">Goals</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#non-goals">Non-goals</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#schema">Schema</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#standard-statistics">Standard statistics</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#examples">Examples</a><ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#simple-record-batch">Simple record batch</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#complex-record-batch">Complex record batch</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#simple-array">Simple array</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#complex-array">Complex array</a></li> |
| </ul> |
| </li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/main/docs/source/format/StatisticsSchema.rst"> |
| <i class="fa-solid fa-pencil"></i> |
| |
| |
| |
| Edit on GitHub |
| |
| |
| </a> |
| </div> |
| </div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script> |
| <script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"> |
| |
| <p class="copyright"> |
| |
| © Copyright 2016-2025 Apache Software Foundation. |
| Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. |
| <br/> |
| |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"> |
| <p class="theme-version"> |
| <!-- # L10n: Setting the PST URL as an argument as this does not need to be localized --> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.16.1. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |