blob: a2c254c691e59e273e790f621bd013de3485e131 [file]
<!DOCTYPE html>
<html lang="en" data-content_root="../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Quickstart &#8212; Apache DataFusion Java documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
<!-- So that users can add custom icons -->
<script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'user-guide/quickstart';</script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="SessionContext" href="sessioncontext.html" />
<link rel="prev" title="Installation" href="installation.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<p class="title logo__title">Apache DataFusion Java documentation</p>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/apache/datafusion-java">
GitHub Repository
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/apache/datafusion-java/issues">
Issue Tracker
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://datafusion.apache.org/">
Apache DataFusion
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">
Code of Conduct
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
User Guide
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button"
data-bs-toggle="dropdown" aria-expanded="false"
aria-controls="pst-nav-more-links">
More
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class=" ">
<a class="nav-link dropdown-item nav-internal" href="../contributor-guide/index.html">
Contributor Guide
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
<span class="fa-solid fa-outdent"></span>
</button>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/apache/datafusion-java">
GitHub Repository
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/apache/datafusion-java/issues">
Issue Tracker
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://datafusion.apache.org/">
Apache DataFusion
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">
Code of Conduct
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="index.html">
User Guide
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../contributor-guide/index.html">
Contributor Guide
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Quickstart</a></li>
<li class="toctree-l1"><a class="reference internal" href="sessioncontext.html">SessionContext</a></li>
<li class="toctree-l1"><a class="reference internal" href="dataframe.html">DataFrame and SQL</a></li>
<li class="toctree-l1"><a class="reference internal" href="parquet.html">Parquet</a></li>
<li class="toctree-l1"><a class="reference internal" href="proto-plans.html">Logical plans via datafusion-proto</a></li>
<li class="toctree-l1"><a class="reference internal" href="scalar-udf.html">Scalar UDFs</a></li>
<li class="toctree-l1"><a class="reference internal" href="table-provider.html">Java table providers</a></li>
<li class="toctree-l1"><a class="reference internal" href="api-reference.html">API Reference</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
<div class="sidebar-primary-item">
<div id="ethical-ad-placement"
class="flat"
data-ea-publisher="readthedocs"
data-ea-type="readthedocs-sidebar"
data-ea-manual="true">
</div></div>
</div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">User Guide</a></li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Quickstart</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<section id="quickstart">
<h1>Quickstart<a class="headerlink" href="#quickstart" title="Link to this heading">#</a></h1>
<p>This page walks through a complete query end-to-end.</p>
<section id="the-full-example">
<h2>The full example<a class="headerlink" href="#the-full-example" title="Link to this heading">#</a></h2>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowReader</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.datafusion.DataFrame</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.datafusion.SessionContext</span><span class="p">;</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="kd">var</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="n">ctx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">SessionContext</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="na">registerParquet</span><span class="p">(</span><span class="s">&quot;orders&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;/path/to/orders.parquet&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">DataFrame</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="na">sql</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;SELECT o_orderpriority, COUNT(*) AS n &quot;</span><span class="w"> </span><span class="o">+</span>
<span class="w"> </span><span class="s">&quot;FROM orders GROUP BY o_orderpriority&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">df</span><span class="p">.</span><span class="na">collect</span><span class="p">(</span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="kd">var</span><span class="w"> </span><span class="n">batch</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// ...</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="walkthrough">
<h2>Walkthrough<a class="headerlink" href="#walkthrough" title="Link to this heading">#</a></h2>
<p><strong>Allocator.</strong> <code class="docutils literal notranslate"><span class="pre">RootAllocator</span></code> is the Arrow off-heap memory allocator. Every
JVM-side Arrow buffer is tracked under an allocator; when the allocator is
closed, leaked buffers are reported. Use one allocator per query (or one
per application) and close it in a <code class="docutils literal notranslate"><span class="pre">try</span></code>-with-resources.</p>
<p><strong>Session context.</strong> <code class="docutils literal notranslate"><span class="pre">SessionContext</span></code> is the entry point into DataFusion. It
holds the catalog of registered tables and the query planner. It is
<code class="docutils literal notranslate"><span class="pre">AutoCloseable</span></code> and <strong>not thread-safe</strong> — use one per thread, or guard
access externally.</p>
<p><strong>Registering data.</strong> <code class="docutils literal notranslate"><span class="pre">registerParquet(name,</span> <span class="pre">path)</span></code> reads the file’s footer
on call and exposes it under the given table name. See
<a class="reference internal" href="parquet.html"><span class="std std-doc">Parquet</span></a> for the options form.</p>
<p><strong>SQL.</strong> <code class="docutils literal notranslate"><span class="pre">ctx.sql(&quot;...&quot;)</span></code> plans the query and returns a <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code>. The
query is not executed until results are pulled.</p>
<p><strong>Collecting results.</strong> <code class="docutils literal notranslate"><span class="pre">df.collect(allocator)</span></code> starts native execution and
returns an <code class="docutils literal notranslate"><span class="pre">ArrowReader</span></code>. Each <code class="docutils literal notranslate"><span class="pre">loadNextBatch()</span></code> call pulls the next
<code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code>; iterate until it returns <code class="docutils literal notranslate"><span class="pre">false</span></code>.</p>
<p><strong>Cleanup.</strong> Both <code class="docutils literal notranslate"><span class="pre">SessionContext</span></code> and <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> are <code class="docutils literal notranslate"><span class="pre">AutoCloseable</span></code>. Use
<code class="docutils literal notranslate"><span class="pre">try</span></code>-with-resources so native resources and Arrow buffers are released
even on exception.</p>
</section>
</section>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
<a class="left-prev"
href="installation.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Installation</p>
</div>
</a>
<a class="right-next"
href="sessioncontext.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">SessionContext</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<dialog id="pst-secondary-sidebar-modal"></dialog>
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-full-example">The full example</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#walkthrough">Walkthrough</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
<li><a href="../_sources/user-guide/quickstart.md.txt"
rel="nofollow">Show Source</a></li>
</ul>
</div></div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2026, Apache Software Foundation.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.4.7.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
<!-- # L10n: Setting the PST URL as an argument as this does not need to be localized -->
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.16.1.
</p></div>
</div>
</div>
</footer>
</body>
</html>