| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="../" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>Quickstart — Apache DataFusion Java documentation</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; |
| </script> |
| <!-- |
| this give us a css class that will be invisible only if js is disabled |
| --> |
| <noscript> |
| <style> |
| .pst-js-only { display: none !important; } |
| |
| </style> |
| </noscript> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" /> |
| |
| <!-- So that users can add custom icons --> |
| <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script> |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" /> |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" /> |
| |
| <script src="../_static/documentation_options.js?v=5929fcd5"></script> |
| <script src="../_static/doctools.js?v=9a2dae69"></script> |
| <script src="../_static/sphinx_highlight.js?v=dc90522c"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'user-guide/quickstart';</script> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="SessionContext" href="sessioncontext.html" /> |
| <link rel="prev" title="Installation" href="installation.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| <meta name="docsearch:version" content="" /> |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i>Back to top</button> |
| |
| |
| <dialog id="pst-search-dialog"> |
| |
| <form class="bd-search d-flex align-items-center" |
| action="../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form> |
| </dialog> |
| |
| <div class="pst-async-banner-revealer d-none"> |
| <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> |
| </div> |
| |
| |
| <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> |
| <div class="bd-header__inner bd-page-width"> |
| <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation"> |
| <span class="fa-solid fa-bars"></span> |
| </button> |
| |
| |
| <div class="col-lg-3 navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| |
| |
| <a class="navbar-brand logo" href="../index.html"> |
| |
| |
| |
| |
| |
| |
| <p class="title logo__title">Apache DataFusion Java documentation</p> |
| |
| </a></div> |
| |
| </div> |
| |
| <div class="col-lg-9 navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://github.com/apache/datafusion-java"> |
| GitHub Repository |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://github.com/apache/datafusion-java/issues"> |
| Issue Tracker |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://datafusion.apache.org/"> |
| Apache DataFusion |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md"> |
| Code of Conduct |
| </a> |
| </li> |
| |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| User Guide |
| </a> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <button class="btn dropdown-toggle nav-item" type="button" |
| data-bs-toggle="dropdown" aria-expanded="false" |
| aria-controls="pst-nav-more-links"> |
| More |
| </button> |
| <ul id="pst-nav-more-links" class="dropdown-menu"> |
| |
| <li class=" "> |
| <a class="nav-link dropdown-item nav-internal" href="../contributor-guide/index.html"> |
| Contributor Guide |
| </a> |
| </li> |
| |
| </ul> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| |
| <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page"> |
| <span class="fa-solid fa-outdent"></span> |
| </button> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <dialog id="pst-primary-sidebar-modal"></dialog> |
| <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| |
| |
| <div class="navbar-item"> |
| <nav> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://github.com/apache/datafusion-java"> |
| GitHub Repository |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://github.com/apache/datafusion-java/issues"> |
| Issue Tracker |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://datafusion.apache.org/"> |
| Apache DataFusion |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md"> |
| Code of Conduct |
| </a> |
| </li> |
| |
| |
| <li class="nav-item current active"> |
| <a class="nav-link nav-internal" href="index.html"> |
| User Guide |
| </a> |
| </li> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link nav-internal" href="../contributor-guide/index.html"> |
| Contributor Guide |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <nav class="bd-docs-nav bd-links" |
| aria-label="Section Navigation"> |
| <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p> |
| <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li> |
| <li class="toctree-l1 current active"><a class="current reference internal" href="#">Quickstart</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="sessioncontext.html">SessionContext</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="dataframe.html">DataFrame and SQL</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="parquet.html">Parquet</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="proto-plans.html">Logical plans via datafusion-proto</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="scalar-udf.html">Scalar UDFs</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="table-provider.html">Java table providers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="api-reference.html">API Reference</a></li> |
| </ul> |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <div id="ethical-ad-placement" |
| class="flat" |
| data-ea-publisher="readthedocs" |
| data-ea-type="readthedocs-sidebar" |
| data-ea-manual="true"> |
| </div></div> |
| </div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main" role="main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article d-print-none"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| <nav aria-label="Breadcrumb" class="d-print-none"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="index.html" class="nav-link">User Guide</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Quickstart</span></li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| <section id="quickstart"> |
| <h1>Quickstart<a class="headerlink" href="#quickstart" title="Link to this heading">#</a></h1> |
| <p>This page walks through a complete query end-to-end.</p> |
| <section id="the-full-example"> |
| <h2>The full example<a class="headerlink" href="#the-full-example" title="Link to this heading">#</a></h2> |
| <div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowReader</span><span class="p">;</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.datafusion.DataFrame</span><span class="p">;</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.datafusion.SessionContext</span><span class="p">;</span> |
| |
| <span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="kd">var</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span> |
| <span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="n">ctx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">SessionContext</span><span class="p">())</span><span class="w"> </span><span class="p">{</span> |
| |
| <span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="na">registerParquet</span><span class="p">(</span><span class="s">"orders"</span><span class="p">,</span><span class="w"> </span><span class="s">"/path/to/orders.parquet"</span><span class="p">);</span> |
| |
| <span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">DataFrame</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="na">sql</span><span class="p">(</span> |
| <span class="w"> </span><span class="s">"SELECT o_orderpriority, COUNT(*) AS n "</span><span class="w"> </span><span class="o">+</span> |
| <span class="w"> </span><span class="s">"FROM orders GROUP BY o_orderpriority"</span><span class="p">);</span> |
| <span class="w"> </span><span class="n">ArrowReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">df</span><span class="p">.</span><span class="na">collect</span><span class="p">(</span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">())</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="n">batch</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span> |
| <span class="w"> </span><span class="c1">// ...</span> |
| <span class="w"> </span><span class="p">}</span> |
| <span class="w"> </span><span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="walkthrough"> |
| <h2>Walkthrough<a class="headerlink" href="#walkthrough" title="Link to this heading">#</a></h2> |
| <p><strong>Allocator.</strong> <code class="docutils literal notranslate"><span class="pre">RootAllocator</span></code> is the Arrow off-heap memory allocator. Every |
| JVM-side Arrow buffer is tracked under an allocator; when the allocator is |
| closed, leaked buffers are reported. Use one allocator per query (or one |
| per application) and close it in a <code class="docutils literal notranslate"><span class="pre">try</span></code>-with-resources.</p> |
| <p><strong>Session context.</strong> <code class="docutils literal notranslate"><span class="pre">SessionContext</span></code> is the entry point into DataFusion. It |
| holds the catalog of registered tables and the query planner. It is |
| <code class="docutils literal notranslate"><span class="pre">AutoCloseable</span></code> and <strong>not thread-safe</strong> — use one per thread, or guard |
| access externally.</p> |
| <p><strong>Registering data.</strong> <code class="docutils literal notranslate"><span class="pre">registerParquet(name,</span> <span class="pre">path)</span></code> reads the file’s footer |
| on call and exposes it under the given table name. See |
| <a class="reference internal" href="parquet.html"><span class="std std-doc">Parquet</span></a> for the options form.</p> |
| <p><strong>SQL.</strong> <code class="docutils literal notranslate"><span class="pre">ctx.sql("...")</span></code> plans the query and returns a <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code>. The |
| query is not executed until results are pulled.</p> |
| <p><strong>Collecting results.</strong> <code class="docutils literal notranslate"><span class="pre">df.collect(allocator)</span></code> starts native execution and |
| returns an <code class="docutils literal notranslate"><span class="pre">ArrowReader</span></code>. Each <code class="docutils literal notranslate"><span class="pre">loadNextBatch()</span></code> call pulls the next |
| <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code>; iterate until it returns <code class="docutils literal notranslate"><span class="pre">false</span></code>.</p> |
| <p><strong>Cleanup.</strong> Both <code class="docutils literal notranslate"><span class="pre">SessionContext</span></code> and <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> are <code class="docutils literal notranslate"><span class="pre">AutoCloseable</span></code>. Use |
| <code class="docutils literal notranslate"><span class="pre">try</span></code>-with-resources so native resources and Arrow buffers are released |
| even on exception.</p> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer d-print-none"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="installation.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Installation</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="sessioncontext.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">SessionContext</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <dialog id="pst-secondary-sidebar-modal"></dialog> |
| <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-full-example">The full example</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#walkthrough">Walkthrough</a></li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| <div role="note" aria-label="source link"> |
| <h3>This Page</h3> |
| <ul class="this-page-menu"> |
| <li><a href="../_sources/user-guide/quickstart.md.txt" |
| rel="nofollow">Show Source</a></li> |
| </ul> |
| </div></div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script> |
| <script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"> |
| |
| <p class="copyright"> |
| |
| © Copyright 2026, Apache Software Foundation. |
| <br/> |
| |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.4.7. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"> |
| <p class="theme-version"> |
| <!-- # L10n: Setting the PST URL as an argument as this does not need to be localized --> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.16.1. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |