blob: 8f82069a066990f62cd7cdb7fce5f850115f79be [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Using the SQL API &#8212; Apache DataFusion documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css?v=d08b24aa" />
<!-- So that users can add custom icons -->
<script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../_static/doctools.js?v=fd6eb6e6"></script>
<script src="../_static/sphinx_highlight.js?v=6ffebe34"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'library-user-guide/using-the-sql-api';</script>
<link rel="icon" href="../_static/favicon.svg"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Extending SQL Syntax" href="extending-sql.html" />
<link rel="prev" title="Extensions List" href="extensions.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/original.svg" class="logo__image only-light" alt="Apache DataFusion documentation - Home"/>
<img src="../_static/original_dark.svg" class="logo__image only-dark pst-js-only" alt="Apache DataFusion documentation - Home"/>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
<span class="fa-solid fa-outdent"></span>
</button>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__end">
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p aria-level="2" class="caption" role="heading"><span class="caption-text">ASF Links</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference external" href="https://apache.org">Apache Software Foundation</a></li>
<li class="toctree-l1"><a class="reference external" href="https://www.apache.org/licenses/">License</a></li>
<li class="toctree-l1"><a class="reference external" href="https://www.apache.org/foundation/sponsorship.html">Donate</a></li>
<li class="toctree-l1"><a class="reference external" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li class="toctree-l1"><a class="reference external" href="https://www.apache.org/security/">Security</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Links</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/datafusion">GitHub and Issue Tracker</a></li>
<li class="toctree-l1"><a class="reference external" href="https://crates.io/crates/datafusion">crates.io</a></li>
<li class="toctree-l1"><a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/">API Docs</a></li>
<li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/blog/">Blog</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">Code of conduct</a></li>
<li class="toctree-l1"><a class="reference internal" href="../download.html">Download</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">User Guide</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../user-guide/introduction.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/example-usage.html">Example Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/features.html">Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/concepts-readings-events.html">Concepts, Readings, Events</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/crate-configuration.html">Crate Configuration</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../user-guide/cli/index.html">DataFusion CLI</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/cli/overview.html">Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/cli/installation.html">Installation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/cli/usage.html">Usage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/cli/datasources.html">Local Files / Directories</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/cli/functions.html">CLI Specific Functions</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/dataframe.html">DataFrame API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/arrow-introduction.html">Gentle Arrow Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/expressions.html">Expression API</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../user-guide/sql/index.html">SQL Reference</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/data_types.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/struct_coercion.html">Struct Type Coercion and Field Mapping</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/select.html">SELECT syntax</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/subqueries.html">Subqueries</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/ddl.html">DDL</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/dml.html">DML</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/explain.html">EXPLAIN</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/information_schema.html">Information Schema</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/operators.html">Operators and Literals</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/aggregate_functions.html">Aggregate Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/window_functions.html">Window Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/scalar_functions.html">Scalar Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/special_functions.html">Special Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/format_options.html">Format Options</a></li>
<li class="toctree-l2"><a class="reference internal" href="../user-guide/sql/prepared_statements.html">Prepared Statements</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/configs.html">Configuration Settings</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/explain-usage.html">Reading Explain Plans</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="../user-guide/faq.html">Frequently Asked Questions</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Library User Guide</span></p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="index.html">Introduction</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="upgrading/index.html">Upgrade Guides</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="upgrading/54.0.0.html">DataFusion 54.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/53.0.0.html">DataFusion 53.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/52.0.0.html">DataFusion 52.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/51.0.0.html">DataFusion 51.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/50.0.0.html">DataFusion 50.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/49.0.0.html">DataFusion 49.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/48.0.1.html">DataFusion 48.0.1</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/48.0.0.html">DataFusion 48.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/47.0.0.html">DataFusion 47.0.0</a></li>
<li class="toctree-l2"><a class="reference internal" href="upgrading/46.0.0.html">DataFusion 46.0.0</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="extensions.html">Extensions List</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Using the SQL API</a></li>
<li class="toctree-l1"><a class="reference internal" href="extending-sql.html">Extending SQL Syntax</a></li>
<li class="toctree-l1"><a class="reference internal" href="working-with-exprs.html">Working with <code class="docutils literal notranslate"><span class="pre">Expr</span></code>s</a></li>
<li class="toctree-l1"><a class="reference internal" href="using-the-dataframe-api.html">Using the DataFrame API</a></li>
<li class="toctree-l1"><a class="reference internal" href="building-logical-plans.html">Building Logical Plans</a></li>
<li class="toctree-l1"><a class="reference internal" href="catalogs.html">Catalogs, Schemas, and Tables</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="functions/index.html">Functions</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="functions/adding-udfs.html">Adding User Defined Functions: Scalar/Window/Aggregate/Table Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions/spark.html">Spark Compatible Functions</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="custom-table-providers.html">Custom Table Provider</a></li>
<li class="toctree-l1"><a class="reference internal" href="table-constraints.html">Table Constraint Enforcement</a></li>
<li class="toctree-l1"><a class="reference internal" href="extending-operators.html">Extending Operators</a></li>
<li class="toctree-l1"><a class="reference internal" href="profiling.html">Profiling Cookbook</a></li>
<li class="toctree-l1"><a class="reference internal" href="query-optimizer.html">Query Optimizer</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Contributor Guide</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/index.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/communication.html">Community Communication</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/development_environment.html">Development Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/architecture.html">Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/architecture/dependency-graph.html">Workspace Dependency Graph</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/testing.html">Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/api-health.html">API health policy</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/howtos.html">HOWTOs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/roadmap.html">Roadmap and Improvement Proposals</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/governance.html">Governance</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributor-guide/inviting.html">Inviting New Committers and PMC Members</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../contributor-guide/specification/index.html">Specifications</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../contributor-guide/specification/invariants.html">Invariants</a></li>
<li class="toctree-l2"><a class="reference internal" href="../contributor-guide/specification/output-field-name-semantic.html">Output field name semantics</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../contributor-guide/gsoc/index.html">Google Summer of Code (GSOC)</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../contributor-guide/gsoc/gsoc_application_guidelines_2025.html">GSoC Application Guidelines (2025)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../contributor-guide/gsoc/gsoc_project_ideas_2025.html">GSoC Project Ideas (2025)</a></li>
</ul>
</details></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">DataFusion Subprojects</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/ballista/">DataFusion Ballista</a></li>
<li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/comet/">DataFusion Comet</a></li>
<li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/python/">DataFusion Python</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
<div class="sidebar-primary-item">
<div id="ethical-ad-placement"
class="flat"
data-ea-publisher="readthedocs"
data-ea-type="readthedocs-sidebar"
data-ea-manual="true">
</div></div>
</div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Using the SQL API</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<section id="using-the-sql-api">
<h1>Using the SQL API<a class="headerlink" href="#using-the-sql-api" title="Link to this heading">#</a></h1>
<p>DataFusion has a full SQL API that allows you to interact with DataFusion using
SQL query strings. The simplest way to use the SQL API is to use the
<a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html"><code class="docutils literal notranslate"><span class="pre">SessionContext</span></code></a> struct which provides the highest-level API for executing SQL
queries.</p>
<p>To use SQL, you first register your data as a table and then run queries
using the <a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql"><code class="docutils literal notranslate"><span class="pre">SessionContext::sql</span></code></a> method. For lower level control such as
preventing DDL, you can use <a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql_with_options"><code class="docutils literal notranslate"><span class="pre">SessionContext::sql_with_options</span></code></a> or the
<a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html"><code class="docutils literal notranslate"><span class="pre">SessionState</span></code></a> APIs</p>
<section id="registering-data-sources-using-sessioncontext-register">
<h2>Registering Data Sources using <code class="docutils literal notranslate"><span class="pre">SessionContext::register*</span></code><a class="headerlink" href="#registering-data-sources-using-sessioncontext-register" title="Link to this heading">#</a></h2>
<p>The <code class="docutils literal notranslate"><span class="pre">SessionContext::register*</span></code> methods tell DataFusion the name of
the source and how to read data. Once registered, you can execute SQL queries
using the <a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql"><code class="docutils literal notranslate"><span class="pre">SessionContext::sql</span></code></a> method referring to your data source as a table.</p>
<p>The <a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql"><code class="docutils literal notranslate"><span class="pre">SessionContext::sql</span></code></a> method returns a <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> for ease of
use. See the <a class="reference internal" href="using-the-dataframe-api.html"><span class="std std-doc">“Using the DataFrame API”</span></a> section for more information on how to
work with DataFrames.</p>
<section id="read-a-csv-file">
<h3>Read a CSV File<a class="headerlink" href="#read-a-csv-file" title="Link to this heading">#</a></h3>
<div class="highlight-rust notranslate"><div class="highlight"><pre><span></span><span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">error</span><span class="p">::</span><span class="nb">Result</span><span class="p">;</span>
<span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">prelude</span><span class="p">::</span><span class="o">*</span><span class="p">;</span>
<span class="k">use</span><span class="w"> </span><span class="n">arrow</span><span class="p">::</span><span class="n">record_batch</span><span class="p">::</span><span class="n">RecordBatch</span><span class="p">;</span>
<span class="cp">#[tokio::main]</span>
<span class="k">async</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Result</span><span class="o">&lt;</span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">ctx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SessionContext</span><span class="p">::</span><span class="n">new</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// register the &quot;example&quot; table</span>
<span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">register_csv</span><span class="p">(</span><span class="s">&quot;example&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;tests/data/example.csv&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">CsvReadOptions</span><span class="p">::</span><span class="n">new</span><span class="p">()).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// create a plan to run a SQL query</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span><span class="s">&quot;SELECT a, min(b) FROM example WHERE a &lt;= b GROUP BY a LIMIT 100&quot;</span><span class="p">).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the plan and collect the results as Vec&lt;RecordBatch&gt;</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">results</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o">&lt;</span><span class="n">RecordBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">df</span><span class="p">.</span><span class="n">collect</span><span class="p">().</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// Use the assert_batches_eq macro to compare the results with expected output</span>
<span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">assert_batches_eq</span><span class="o">!</span><span class="p">(</span><span class="fm">vec!</span><span class="p">[</span>
<span class="w"> </span><span class="s">&quot;+---+----------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| a | min(example.b) |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---+----------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 2 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---+----------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="o">&amp;</span><span class="n">results</span>
<span class="w"> </span><span class="p">);</span>
<span class="w"> </span><span class="nb">Ok</span><span class="p">(())</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="read-an-apache-parquet-file">
<h3>Read an Apache Parquet file<a class="headerlink" href="#read-an-apache-parquet-file" title="Link to this heading">#</a></h3>
<p>Similarly to CSV, you can register a Parquet file as a table using the <code class="docutils literal notranslate"><span class="pre">register_parquet</span></code> method.</p>
<div class="highlight-rust notranslate"><div class="highlight"><pre><span></span><span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">error</span><span class="p">::</span><span class="nb">Result</span><span class="p">;</span>
<span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">prelude</span><span class="p">::</span><span class="o">*</span><span class="p">;</span>
<span class="cp">#[tokio::main]</span>
<span class="k">async</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Result</span><span class="o">&lt;</span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// create local session context</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">ctx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SessionContext</span><span class="p">::</span><span class="n">new</span><span class="p">();</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">testdata</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">test_util</span><span class="p">::</span><span class="n">parquet_test_data</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// register parquet file with the execution context</span>
<span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">register_parquet</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;alltypes_plain&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="o">&amp;</span><span class="fm">format!</span><span class="p">(</span><span class="s">&quot;{testdata}/alltypes_plain.parquet&quot;</span><span class="p">),</span>
<span class="w"> </span><span class="n">ParquetReadOptions</span><span class="p">::</span><span class="n">default</span><span class="p">(),</span>
<span class="w"> </span><span class="p">)</span>
<span class="w"> </span><span class="p">.</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the query</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \</span>
<span class="s"> FROM alltypes_plain \</span>
<span class="s"> WHERE id &gt; 1 AND tinyint_col &lt; double_col&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the plan, and compare to the expected results</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">results</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">df</span><span class="p">.</span><span class="n">collect</span><span class="p">().</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">assert_batches_eq</span><span class="o">!</span><span class="p">(</span><span class="fm">vec!</span><span class="p">[</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| int_col | double_col | alltypes_plain.date_string_col |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 03/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 04/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 02/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="o">&amp;</span><span class="n">results</span>
<span class="w"> </span><span class="p">);</span>
<span class="w"> </span><span class="nb">Ok</span><span class="p">(())</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="read-an-apache-avro-file">
<h3>Read an Apache Avro file<a class="headerlink" href="#read-an-apache-avro-file" title="Link to this heading">#</a></h3>
<p>DataFusion can also read Avro files using the <code class="docutils literal notranslate"><span class="pre">register_avro</span></code> method.</p>
<div class="highlight-rust notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">arrow</span><span class="p">::</span><span class="n">util</span><span class="p">::</span><span class="n">pretty</span><span class="p">;</span>
<span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">error</span><span class="p">::</span><span class="nb">Result</span><span class="p">;</span>
<span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">prelude</span><span class="p">::</span><span class="o">*</span><span class="p">;</span>
<span class="cp">#[tokio::main]</span>
<span class="k">async</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Result</span><span class="o">&lt;</span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">ctx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SessionContext</span><span class="p">::</span><span class="n">new</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// find the path to the avro test files</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">testdata</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">test_util</span><span class="p">::</span><span class="n">arrow_test_data</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// register avro file with the execution context</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">avro_file</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">&amp;</span><span class="fm">format!</span><span class="p">(</span><span class="s">&quot;{testdata}/avro/alltypes_plain.avro&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">register_avro</span><span class="p">(</span><span class="s">&quot;alltypes_plain&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">avro_file</span><span class="p">,</span><span class="w"> </span><span class="n">AvroReadOptions</span><span class="p">::</span><span class="n">default</span><span class="p">()).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the query</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \</span>
<span class="s"> FROM alltypes_plain \</span>
<span class="s"> WHERE id &gt; 1 AND tinyint_col &lt; double_col&quot;</span>
<span class="w"> </span><span class="p">).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the plan, and compare to the expected results</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">results</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">df</span><span class="p">.</span><span class="n">collect</span><span class="p">().</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">assert_batches_eq</span><span class="o">!</span><span class="p">(</span><span class="fm">vec!</span><span class="p">[</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| int_col | double_col | alltypes_plain.date_string_col |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 03/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 04/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 02/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="o">&amp;</span><span class="n">results</span>
<span class="w"> </span><span class="p">);</span>
<span class="w"> </span><span class="nb">Ok</span><span class="p">(())</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
</section>
<section id="reading-multiple-files-as-a-table">
<h2>Reading Multiple Files as a table<a class="headerlink" href="#reading-multiple-files-as-a-table" title="Link to this heading">#</a></h2>
<p>It is also possible to read multiple files as a single table. This is done
with the ListingTableProvider which takes a list of file paths and reads them
as a single table, matching schemas as appropriate</p>
<p>Coming Soon</p>
<div class="highlight-rust notranslate"><div class="highlight"><pre><span></span>
</pre></div>
</div>
</section>
<section id="using-create-external-table-to-register-data-sources-via-sql">
<h2>Using <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">EXTERNAL</span> <span class="pre">TABLE</span></code> to register data sources via SQL<a class="headerlink" href="#using-create-external-table-to-register-data-sources-via-sql" title="Link to this heading">#</a></h2>
<p>You can also register files using SQL using the <a class="reference internal" href="../user-guide/sql/ddl.html#create-external-table"><span class="std std-ref"><code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">EXTERNAL</span> <span class="pre">TABLE</span></code></span></a>
statement.</p>
<div class="highlight-rust notranslate"><div class="highlight"><pre><span></span><span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">error</span><span class="p">::</span><span class="nb">Result</span><span class="p">;</span>
<span class="k">use</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">prelude</span><span class="p">::</span><span class="o">*</span><span class="p">;</span>
<span class="cp">#[tokio::main]</span>
<span class="k">async</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Result</span><span class="o">&lt;</span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// create local session context</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">ctx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SessionContext</span><span class="p">::</span><span class="n">new</span><span class="p">();</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">testdata</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">test_util</span><span class="p">::</span><span class="n">parquet_test_data</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// register parquet file using SQL</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">ddl</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="fm">format!</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;CREATE EXTERNAL TABLE alltypes_plain \</span>
<span class="s"> STORED AS PARQUET LOCATION &#39;{testdata}/alltypes_plain.parquet&#39;&quot;</span>
<span class="w"> </span><span class="p">);</span>
<span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span><span class="o">&amp;</span><span class="n">ddl</span><span class="p">).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the query referring to the alltypes_plain table we just registered</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="p">.</span><span class="n">sql</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \</span>
<span class="s"> FROM alltypes_plain \</span>
<span class="s"> WHERE id &gt; 1 AND tinyint_col &lt; double_col&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">).</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="c1">// execute the plan, and compare to the expected results</span>
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">results</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">df</span><span class="p">.</span><span class="n">collect</span><span class="p">().</span><span class="k">await</span><span class="o">?</span><span class="p">;</span>
<span class="w"> </span><span class="n">datafusion</span><span class="p">::</span><span class="n">assert_batches_eq</span><span class="o">!</span><span class="p">(</span><span class="fm">vec!</span><span class="p">[</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| int_col | double_col | alltypes_plain.date_string_col |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 03/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 04/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;| 1 | 10.1 | 02/01/09 |&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="s">&quot;+---------+------------+--------------------------------+&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="o">&amp;</span><span class="n">results</span>
<span class="w"> </span><span class="p">);</span>
<span class="w"> </span><span class="nb">Ok</span><span class="p">(())</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
<a class="left-prev"
href="extensions.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Extensions List</p>
</div>
</a>
<a class="right-next"
href="extending-sql.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Extending SQL Syntax</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<dialog id="pst-secondary-sidebar-modal"></dialog>
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#registering-data-sources-using-sessioncontext-register">Registering Data Sources using <code class="docutils literal notranslate"><span class="pre">SessionContext::register*</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#read-a-csv-file">Read a CSV File</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#read-an-apache-parquet-file">Read an Apache Parquet file</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#read-an-apache-avro-file">Read an Apache Avro file</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reading-multiple-files-as-a-table">Reading Multiple Files as a table</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#using-create-external-table-to-register-data-sources-via-sql">Using <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">EXTERNAL</span> <span class="pre">TABLE</span></code> to register data sources via SQL</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow-datafusion/edit/main/docs/source/library-user-guide/using-the-sql-api.md">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
<div class="sidebar-secondary-item">
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
<li><a href="../_sources/library-user-guide/using-the-sql-api.md.txt"
rel="nofollow">Show Source</a></li>
</ul>
</div></div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<!-- Based on pydata_sphinx_theme/footer.html -->
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p>Apache DataFusion, Apache, the Apache feather logo, and the Apache DataFusion project logo</p>
<p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</div>
</footer>
</body>
</html>