| |
| <!DOCTYPE html> |
| |
| |
| <html lang="en" data-content_root="../" > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>Introduction — Apache DataFusion documentation</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; |
| </script> |
| <!-- |
| this give us a css class that will be invisible only if js is disabled |
| --> |
| <noscript> |
| <style> |
| .pst-js-only { display: none !important; } |
| |
| </style> |
| </noscript> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" /> |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css?v=d08b24aa" /> |
| |
| <!-- So that users can add custom icons --> |
| <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script> |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" /> |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" /> |
| |
| <script src="../_static/documentation_options.js?v=5929fcd5"></script> |
| <script src="../_static/doctools.js?v=9bcbadda"></script> |
| <script src="../_static/sphinx_highlight.js?v=dc90522c"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = 'user-guide/introduction';</script> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Example Usage" href="example-usage.html" /> |
| <link rel="prev" title="Download" href="../download.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <meta name="docsearch:language" content="en"/> |
| <meta name="docsearch:version" content="" /> |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div> |
| |
| <div id="pst-scroll-pixel-helper"></div> |
| |
| <button type="button" class="btn rounded-pill" id="pst-back-to-top"> |
| <i class="fa-solid fa-arrow-up"></i>Back to top</button> |
| |
| |
| <dialog id="pst-search-dialog"> |
| |
| <form class="bd-search d-flex align-items-center" |
| action="../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form> |
| </dialog> |
| |
| <div class="pst-async-banner-revealer d-none"> |
| <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside> |
| </div> |
| |
| |
| <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none"> |
| <div class="bd-header__inner bd-page-width"> |
| <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation"> |
| <span class="fa-solid fa-bars"></span> |
| </button> |
| |
| |
| <div class="col-lg-3 navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| |
| |
| |
| <a class="navbar-brand logo" href="../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../_static/original.svg" class="logo__image only-light" alt="Apache DataFusion documentation - Home"/> |
| <img src="../_static/original_dark.svg" class="logo__image only-dark pst-js-only" alt="Apache DataFusion documentation - Home"/> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| <div class="col-lg-9 navbar-header-items"> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| |
| <button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <span class="search-button__default-text">Search</span> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span> |
| </button> |
| </div> |
| |
| |
| |
| <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page"> |
| <span class="fa-solid fa-outdent"></span> |
| </button> |
| |
| </div> |
| |
| </header> |
| |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| |
| |
| <dialog id="pst-primary-sidebar-modal"></dialog> |
| <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"> |
| |
| <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i> |
| <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i> |
| <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i> |
| </button></div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="sidebar-primary-items__start sidebar-primary__section"> |
| <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| |
| <div class="bd-toc-item active"> |
| |
| <p aria-level="2" class="caption" role="heading"><span class="caption-text">ASF Links</span></p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference external" href="https://apache.org">Apache Software Foundation</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://www.apache.org/licenses/">License</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://www.apache.org/foundation/sponsorship.html">Donate</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://www.apache.org/security/">Security</a></li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"><span class="caption-text">Links</span></p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference external" href="https://github.com/apache/datafusion">GitHub and Issue Tracker</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://crates.io/crates/datafusion">crates.io</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/">API Docs</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/blog/">Blog</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">Code of conduct</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../download.html">Download</a></li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"><span class="caption-text">User Guide</span></p> |
| <ul class="current nav bd-sidenav"> |
| <li class="toctree-l1 current active"><a class="current reference internal" href="#">Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="example-usage.html">Example Usage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="features.html">Features</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="concepts-readings-events.html">Concepts, Readings, Events</a></li> |
| |
| <li class="toctree-l1"><a class="reference internal" href="crate-configuration.html">Crate Configuration</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="cli/index.html">DataFusion CLI</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="cli/overview.html">Overview</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="cli/installation.html">Installation</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="cli/usage.html">Usage</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="cli/datasources.html">Local Files / Directories</a></li> |
| |
| |
| |
| |
| <li class="toctree-l2"><a class="reference internal" href="cli/functions.html">CLI Specific Functions</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="dataframe.html">DataFrame API</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="arrow-introduction.html">Gentle Arrow Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="expressions.html">Expression API</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="sql/index.html">SQL Reference</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="sql/data_types.html">Data Types</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/select.html">SELECT syntax</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/subqueries.html">Subqueries</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/ddl.html">DDL</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/dml.html">DML</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/explain.html">EXPLAIN</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/information_schema.html">Information Schema</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/operators.html">Operators and Literals</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/aggregate_functions.html">Aggregate Functions</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/window_functions.html">Window Functions</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/scalar_functions.html">Scalar Functions</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/special_functions.html">Special Functions</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="sql/format_options.html">Format Options</a></li> |
| |
| <li class="toctree-l2"><a class="reference internal" href="sql/prepared_statements.html">Prepared Statements</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="configs.html">Configuration Settings</a></li> |
| |
| |
| |
| <li class="toctree-l1"><a class="reference internal" href="explain-usage.html">Reading Explain Plans</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="metrics.html">Metrics</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a></li> |
| |
| </ul> |
| <p aria-level="2" class="caption" role="heading"><span class="caption-text">Library User Guide</span></p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/index.html">Introduction</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/upgrading.html">Upgrade Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/extensions.html">Extensions List</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/using-the-sql-api.html">Using the SQL API</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/extending-sql.html">Extending SQL Syntax</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/working-with-exprs.html">Working with <code class="docutils literal notranslate"><span class="pre">Expr</span></code>s</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/using-the-dataframe-api.html">Using the DataFrame API</a></li> |
| |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/building-logical-plans.html">Building Logical Plans</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/catalogs.html">Catalogs, Schemas, and Tables</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="../library-user-guide/functions/index.html">Functions</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="../library-user-guide/functions/adding-udfs.html">Adding User Defined Functions: Scalar/Window/Aggregate/Table Functions</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../library-user-guide/functions/spark.html">Spark Compatible Functions</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/custom-table-providers.html">Custom Table Provider</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/table-constraints.html">Table Constraint Enforcement</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/extending-operators.html">Extending Operators</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/profiling.html">Profiling Cookbook</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../library-user-guide/query-optimizer.html">Query Optimizer</a></li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"><span class="caption-text">Contributor Guide</span></p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/index.html">Introduction</a></li> |
| |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/communication.html">Community Communication</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/development_environment.html">Development Environment</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/architecture.html">Architecture</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/architecture/dependency-graph.html">Workspace Dependency Graph</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/testing.html">Testing</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/api-health.html">API health policy</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/howtos.html">HOWTOs</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/roadmap.html">Roadmap and Improvement Proposals</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/governance.html">Governance</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../contributor-guide/inviting.html">Inviting New Committers and PMC Members</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="../contributor-guide/specification/index.html">Specifications</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="../contributor-guide/specification/invariants.html">Invariants</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../contributor-guide/specification/output-field-name-semantic.html">Output field name semantics</a></li> |
| </ul> |
| </details></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="../contributor-guide/gsoc/index.html">Google Summer of Code (GSOC)</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul> |
| <li class="toctree-l2"><a class="reference internal" href="../contributor-guide/gsoc/gsoc_application_guidelines_2025.html">GSoC Application Guidelines (2025)</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../contributor-guide/gsoc/gsoc_project_ideas_2025.html">GSoC Project Ideas (2025)</a></li> |
| </ul> |
| </details></li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"><span class="caption-text">DataFusion Subprojects</span></p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/ballista/">DataFusion Ballista</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/comet/">DataFusion Comet</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://datafusion.apache.org/python/">DataFusion Python</a></li> |
| </ul> |
| |
| |
| </div> |
| </nav></div> |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| <div class="sidebar-primary-item"> |
| <div id="ethical-ad-placement" |
| class="flat" |
| data-ea-publisher="readthedocs" |
| data-ea-type="readthedocs-sidebar" |
| data-ea-manual="true"> |
| </div></div> |
| </div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main" role="main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article d-print-none"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| <nav aria-label="Breadcrumb" class="d-print-none"> |
| <ul class="bd-breadcrumbs"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Introduction</span></li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article"> |
| |
| <!--- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| <section id="introduction"> |
| <h1>Introduction<a class="headerlink" href="#introduction" title="Link to this heading">#</a></h1> |
| <p>DataFusion is a very fast, extensible query engine for building |
| high-quality data-centric systems in <a class="reference external" href="http://rustlang.org">Rust</a>, |
| using the <a class="reference external" href="https://arrow.apache.org">Apache Arrow</a> in-memory format. |
| DataFusion originated as part of the <a class="reference external" href="https://arrow.apache.org/">Apache Arrow</a> |
| project.</p> |
| <p>DataFusion offers SQL and Dataframe APIs, excellent <a class="reference external" href="https://benchmark.clickhouse.com/">performance</a>, built-in support for CSV, Parquet, JSON, and Avro, <a class="reference external" href="https://github.com/apache/datafusion-python">python bindings</a>, extensive customization, a great community, and more.</p> |
| <section id="project-goals"> |
| <h2>Project Goals<a class="headerlink" href="#project-goals" title="Link to this heading">#</a></h2> |
| <p>DataFusion aims to be the query engine of choice for new, fast |
| data centric systems such as databases, dataframe libraries, machine |
| learning and streaming applications by leveraging the unique features |
| of <a class="reference external" href="https://www.rust-lang.org/">Rust</a> and <a class="reference external" href="https://arrow.apache.org/">Apache |
| Arrow</a>.</p> |
| </section> |
| <section id="features"> |
| <h2>Features<a class="headerlink" href="#features" title="Link to this heading">#</a></h2> |
| <ul class="simple"> |
| <li><p>Feature-rich <a class="reference external" href="https://datafusion.apache.org/user-guide/sql/index.html">SQL support</a> and <a class="reference external" href="https://datafusion.apache.org/user-guide/dataframe.html">DataFrame API</a></p></li> |
| <li><p>Blazingly fast, vectorized, multithreaded, streaming execution engine.</p></li> |
| <li><p>Native support for Parquet, CSV, JSON, and Avro file formats. Support |
| for custom file formats and non-file datasources via the <code class="docutils literal notranslate"><span class="pre">TableProvider</span></code> trait.</p></li> |
| <li><p>Many extension points: user defined scalar/aggregate/window functions, DataSources, SQL, |
| other query languages, custom plan and execution nodes, optimizer passes, and more.</p></li> |
| <li><p>Streaming, asynchronous IO directly from popular object stores, including AWS S3, |
| Azure Blob Storage, and Google Cloud Storage (Other storage systems are supported via the |
| <code class="docutils literal notranslate"><span class="pre">ObjectStore</span></code> trait).</p></li> |
| <li><p><a class="reference external" href="https://docs.rs/datafusion/latest">Excellent Documentation</a> and a |
| <a class="reference external" href="https://datafusion.apache.org/contributor-guide/communication.html">welcoming community</a>.</p></li> |
| <li><p>A state of the art query optimizer with expression coercion and |
| simplification, projection and filter pushdown, sort and distribution |
| aware optimizations, automatic join reordering, and more.</p></li> |
| <li><p>Permissive Apache 2.0 License, predictable and well understood |
| <a class="reference external" href="https://www.apache.org/">Apache Software Foundation</a> governance.</p></li> |
| <li><p>Implementation in <a class="reference external" href="https://www.rust-lang.org/">Rust</a>, a modern |
| system language with development productivity similar to Java or |
| Golang, the performance of C++, and <a class="reference external" href="https://insights.stackoverflow.com/survey/2021#technology-most-loved-dreaded-and-wanted">loved by programmers |
| everywhere</a>.</p></li> |
| <li><p>Support for <a class="reference external" href="https://substrait.io/">Substrait</a> query plans, to |
| easily pass plans across language and system boundaries.</p></li> |
| </ul> |
| </section> |
| <section id="use-cases"> |
| <h2>Use Cases<a class="headerlink" href="#use-cases" title="Link to this heading">#</a></h2> |
| <p>DataFusion can be used without modification as an embedded SQL |
| engine or can be customized and used as a foundation for |
| building new systems.</p> |
| <p>While most current use cases are “analytic” or (throughput) some |
| components of DataFusion such as the plan representations, are |
| suitable for “streaming” and “transaction” style systems (low |
| latency).</p> |
| <p>Here are some example systems built using DataFusion:</p> |
| <ul class="simple"> |
| <li><p>Specialized Analytical Database systems such as <a class="reference external" href="https://github.com/apache/incubator-horaedb">HoraeDB</a> and more general Apache Spark like system such as <a class="reference external" href="https://github.com/apache/datafusion-ballista">Ballista</a></p></li> |
| <li><p>New query language engines such as <a class="reference external" href="https://github.com/prql/prql-query">prql-query</a> and accelerators such as <a class="reference external" href="https://vegafusion.io/">VegaFusion</a></p></li> |
| <li><p>Research platform for new Database Systems, such as <a class="reference external" href="https://github.com/flock-lab/flock">Flock</a></p></li> |
| <li><p>SQL support to another library, such as <a class="reference external" href="https://vortex.dev/">Vortex</a></p></li> |
| <li><p>Streaming data platforms such as <a class="reference external" href="https://synnada.ai/">Synnada</a></p></li> |
| <li><p>Tools for reading / sorting / transcoding Parquet, CSV, AVRO, and JSON files such as <a class="reference external" href="https://github.com/timvw/qv">qv</a></p></li> |
| <li><p>Native Spark runtime replacement such as <a class="reference external" href="https://github.com/apache/auron">Auron</a></p></li> |
| <li><p>Distributed data cache to boost GPU utilization of AI workloads with <a class="reference external" href="https://www.kubeflow.org/docs/components/trainer/user-guides/data-cache/">Kubeflow Trainer</a></p></li> |
| </ul> |
| <p>By using DataFusion, projects are freed to focus on their specific |
| features, and avoid reimplementing general (but still necessary) |
| features such as an expression representation, standard optimizations, |
| parallelized streaming execution plans, file format support, etc.</p> |
| </section> |
| <section id="known-users"> |
| <h2>Known Users<a class="headerlink" href="#known-users" title="Link to this heading">#</a></h2> |
| <p>Here are some active projects using DataFusion:</p> |
| <!-- "Active" means github repositories that had at least one commit in the last 6 months --> |
| <ul class="simple"> |
| <li><p><a class="reference external" href="https://github.com/ArroyoSystems/arroyo">Arroyo</a> Distributed stream processing engine in Rust</p></li> |
| <li><p><a class="reference external" href="https://github.com/arkflow-rs/arkflow">ArkFlow</a> High-performance Rust stream processing engine</p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/auron">Auron</a> The Auron accelerator for big data engine (e.g., Spark, Flink) leverages native vectorized execution to accelerate query processing</p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/datafusion-ballista">Ballista</a> Distributed SQL Query Engine</p></li> |
| <li><p><a class="reference external" href="https://github.com/cnosdb/cnosdb">CnosDB</a> Open Source Distributed Time Series Database</p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/datafusion-comet">Comet</a> Apache Spark native query execution plugin</p></li> |
| <li><p><a class="reference external" href="https://github.com/cube-js/cube.js/tree/master/rust">Cube Store</a> Cube’s universal semantic layer platform is the next evolution of OLAP technology for AI, BI, spreadsheets, and embedded analytics</p></li> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-dft">datafusion-dft</a> Batteries included CLI, TUI, and server implementations for DataFusion.</p></li> |
| <li><p><a class="reference external" href="https://github.com/dbt-labs/dbt-fusion">dbt Fusion engine</a> The dbt Fusion engine, written in Rust, designed for speed and correctness with a native SQL understanding across DWH SQL dialects.</p></li> |
| <li><p><a class="reference external" href="https://github.com/delta-io/delta-rs">delta-rs</a> Native Rust implementation of Delta Lake</p></li> |
| <li><p><a class="reference external" href="https://www.enterprisedb.com/products/analytics">EDB Postgres Lakehouse</a> built with <a class="reference external" href="https://github.com/splitgraph/seafowl">Seafowl</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/feldera/feldera">Feldera</a> Fast query engine for incremental computation</p></li> |
| <li><p><a class="reference external" href="https://funnel.io/">Funnel</a> Data Platform powering Marketing Intelligence applications.</p></li> |
| <li><p><a class="reference external" href="https://github.com/GlareDB/glaredb">GlareDB</a> Fast SQL database for querying and analyzing distributed data.</p></li> |
| <li><p><a class="reference external" href="https://github.com/GreptimeTeam/greptimedb">GreptimeDB</a> Open Source & Cloud Native Distributed Time Series Database</p></li> |
| <li><p><a class="reference external" href="https://hiop.io">hiop</a> Serverless Data Logistic Platform</p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/incubator-horaedb">HoraeDB</a> Distributed Time-Series Database</p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/iceberg-rust">Iceberg-rust</a> Rust implementation of Apache Iceberg</p></li> |
| <li><p><a class="reference external" href="https://github.com/influxdata/influxdb">InfluxDB</a> Time Series Database</p></li> |
| <li><p><a class="reference external" href="https://github.com/kamu-data/kamu-cli">Kamu</a> Planet-scale streaming data pipeline</p></li> |
| <li><p><a class="reference external" href="https://github.com/kubeflow/trainer">Kubeflow Trainer</a> Kubernetes-native project designed for |
| scalable LLMs fine-tuning and distributed AI model training.</p></li> |
| <li><p><a class="reference external" href="https://github.com/lakesoul-io/LakeSoul">LakeSoul</a> Open source LakeHouse framework with native IO in Rust.</p></li> |
| <li><p><a class="reference external" href="https://github.com/lancedb/lance">Lance</a> Modern columnar data format for ML</p></li> |
| <li><p><a class="reference external" href="https://github.com/openobserve/openobserve">OpenObserve</a> Distributed cloud native observability platform</p></li> |
| <li><p><a class="reference external" href="https://github.com/paradedb/paradedb">ParadeDB</a> PostgreSQL for Search & Analytics</p></li> |
| <li><p><a class="reference external" href="https://github.com/parseablehq/parseable">Parseable</a> Log storage and observability platform</p></li> |
| <li><p><a class="reference external" href="https://polygon.io/">Polygon.io</a> Stock Market API</p></li> |
| <li><p><a class="reference external" href="https://github.com/timvw/qv">qv</a> Quickly view your data</p></li> |
| <li><p><a class="reference external" href="https://blog.cloudflare.com/r2-sql-deep-dive/">R2 Query Engine</a> Cloudflare’s distributed engine for querying data in Iceberg Catalogs</p></li> |
| <li><p><a class="reference external" href="https://rerun.io/">rerun.io</a> Visualize and query robotics logs and transform them into training data.</p></li> |
| <li><p><a class="reference external" href="https://github.com/restatedev">Restate</a> Easily build resilient applications using distributed durable async/await</p></li> |
| <li><p><a class="reference external" href="https://github.com/roapi/roapi">ROAPI</a> Create full-fledged APIs for slowly moving datasets without writing a single line of code</p></li> |
| <li><p><a class="reference external" href="https://github.com/lakehq/sail">Sail</a> Unifying stream, batch and AI workloads with Apache Spark compatibility</p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/sedona-db">SedonaDB</a> A single-node analytical database engine with geospatial as a first-class citizen</p></li> |
| <li><p><a class="reference external" href="https://github.com/gchq/sleeper">Sleeper</a> Serverless, cloud-native, log-structured merge tree based, scalable key-value store</p></li> |
| <li><p><a class="reference external" href="https://github.com/spiceai/spiceai">Spice.ai</a> Building blocks for data-driven AI applications</p></li> |
| <li><p><a class="reference external" href="https://synnada.ai/">Synnada</a> Streaming-first framework for data products</p></li> |
| <li><p><a class="reference external" href="https://vegafusion.io/">VegaFusion</a> Server-side acceleration for the <a class="reference external" href="https://vega.github.io/">Vega</a> visualization grammar</p></li> |
| <li><p><a class="reference external" href="https://vortex.dev/">Vortex</a> An extensible, state of the art columnar file format</p></li> |
| <li><p><a class="reference external" href="https://telemetry.sh/">Telemetry</a> Structured logging made easy</p></li> |
| <li><p><a class="reference external" href="https://github.com/xorq-labs/xorq/">Xorq</a> Xorq is a multi-engine batch transformation framework built on Ibis, DataFusion and Arrow</p></li> |
| </ul> |
| <p>Here are some less active projects that used DataFusion:</p> |
| <ul class="simple"> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/bdt">bdt</a> Boring Data Tool</p></li> |
| <li><p><a class="reference external" href="https://github.com/cloudfuse-io/buzz-rust">Cloudfuse Buzz</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/dask-contrib/dask-sql">Dask SQL</a> Distributed SQL query engine in Python</p></li> |
| <li><p><a class="reference external" href="https://github.com/wheretrue/exon">Exon</a> Analysis toolkit for life-science applications</p></li> |
| <li><p><a class="reference external" href="https://github.com/flock-lab/flock">Flock</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/tensorbase/tensorbase">Tensorbase</a></p></li> |
| </ul> |
| <p>If you know of another project, please submit a PR to add a link!</p> |
| </section> |
| <section id="integrations-and-extensions"> |
| <h2>Integrations and Extensions<a class="headerlink" href="#integrations-and-extensions" title="Link to this heading">#</a></h2> |
| <p>There are a number of community projects that extend DataFusion or |
| provide integrations with other systems, some of which are described below:</p> |
| <section id="language-bindings"> |
| <h3>Language Bindings<a class="headerlink" href="#language-bindings" title="Link to this heading">#</a></h3> |
| <ul class="simple"> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-c">datafusion-c</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/apache/datafusion-python">datafusion-python</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-ruby">datafusion-ruby</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-java">datafusion-java</a></p></li> |
| </ul> |
| </section> |
| <section id="integrations"> |
| <h3>Integrations<a class="headerlink" href="#integrations" title="Link to this heading">#</a></h3> |
| <ul class="simple"> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-bigtable">datafusion-bigtable</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-catalogprovider-glue">datafusion-catalogprovider-glue</a></p></li> |
| <li><p><a class="reference external" href="https://github.com/datafusion-contrib/datafusion-federation">datafusion-federation</a></p></li> |
| </ul> |
| </section> |
| </section> |
| <section id="why-datafusion"> |
| <h2>Why DataFusion?<a class="headerlink" href="#why-datafusion" title="Link to this heading">#</a></h2> |
| <ul class="simple"> |
| <li><p><em>High Performance</em>: Leveraging Rust and Arrow’s memory model, DataFusion is very fast.</p></li> |
| <li><p><em>Easy to Connect</em>: Being part of the Apache Arrow ecosystem (Arrow, Parquet, and Flight), DataFusion works well with the rest of the big data ecosystem</p></li> |
| <li><p><em>Easy to Embed</em>: Allowing extension at almost any point in its design, and published regularly as a crate on <a class="reference external" href="http://crates.io">crates.io</a>, DataFusion can be integrated and tailored for your specific usecase.</p></li> |
| <li><p><em>High Quality</em>: Extensively tested, both by itself and with the rest of the Arrow ecosystem, DataFusion can and is used as the foundation for production systems.</p></li> |
| </ul> |
| </section> |
| <section id="rust-version-compatibility-policy"> |
| <h2>Rust Version Compatibility Policy<a class="headerlink" href="#rust-version-compatibility-policy" title="Link to this heading">#</a></h2> |
| <p>The Rust toolchain releases are tracked at <a class="reference external" href="https://releases.rs">Rust Versions</a> and follow |
| <a class="reference external" href="https://semver.org/">semantic versioning</a>. A Rust toolchain release can be identified |
| by a version string like <code class="docutils literal notranslate"><span class="pre">1.80.0</span></code>, or more generally <code class="docutils literal notranslate"><span class="pre">major.minor.patch</span></code>.</p> |
| <p>DataFusion supports the last 4 stable Rust minor versions released and any such versions released within the last 4 months.</p> |
| <p>For example, given the releases <code class="docutils literal notranslate"><span class="pre">1.78.0</span></code>, <code class="docutils literal notranslate"><span class="pre">1.79.0</span></code>, <code class="docutils literal notranslate"><span class="pre">1.80.0</span></code>, <code class="docutils literal notranslate"><span class="pre">1.80.1</span></code> and <code class="docutils literal notranslate"><span class="pre">1.81.0</span></code> DataFusion will support 1.78.0, which is 3 minor versions prior to the most minor recent <code class="docutils literal notranslate"><span class="pre">1.81</span></code>.</p> |
| <p>Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies.</p> |
| <p>DataFusion enforces MSRV policy using a <a class="reference external" href="https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&amp;type=code">MSRV CI Check</a></p> |
| </section> |
| </section> |
| |
| |
| </article> |
| |
| |
| |
| |
| |
| <footer class="prev-next-footer d-print-none"> |
| |
| <div class="prev-next-area"> |
| <a class="left-prev" |
| href="../download.html" |
| title="previous page"> |
| <i class="fa-solid fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Download</p> |
| </div> |
| </a> |
| <a class="right-next" |
| href="example-usage.html" |
| title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Example Usage</p> |
| </div> |
| <i class="fa-solid fa-angle-right"></i> |
| </a> |
| </div> |
| </footer> |
| |
| </div> |
| |
| |
| |
| <dialog id="pst-secondary-sidebar-modal"></dialog> |
| <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner"> |
| |
| |
| <div class="sidebar-secondary-item"> |
| <div |
| id="pst-page-navigation-heading-2" |
| class="page-toc tocsection onthispage"> |
| <i class="fa-solid fa-list"></i> On this page |
| </div> |
| <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#project-goals">Project Goals</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#features">Features</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#use-cases">Use Cases</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#known-users">Known Users</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#integrations-and-extensions">Integrations and Extensions</a><ul class="nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#language-bindings">Language Bindings</a></li> |
| <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#integrations">Integrations</a></li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#why-datafusion">Why DataFusion?</a></li> |
| <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#rust-version-compatibility-policy">Rust Version Compatibility Policy</a></li> |
| </ul> |
| </nav></div> |
| |
| <div class="sidebar-secondary-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow-datafusion/edit/main/docs/source/user-guide/introduction.md"> |
| <i class="fa-solid fa-pencil"></i> |
| |
| |
| |
| Edit on GitHub |
| |
| |
| </a> |
| </div> |
| </div> |
| |
| <div class="sidebar-secondary-item"> |
| <div role="note" aria-label="source link"> |
| <h3>This Page</h3> |
| <ul class="this-page-menu"> |
| <li><a href="../_sources/user-guide/introduction.md.txt" |
| rel="nofollow">Show Source</a></li> |
| </ul> |
| </div></div> |
| |
| </div></div> |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script> |
| <script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script> |
| |
| <!-- Based on pydata_sphinx_theme/footer.html --> |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| |
| <div class="footer-item"> |
| <p>Apache DataFusion, Apache, the Apache feather logo, and the Apache DataFusion project logo</p> |
| <p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p> |
| </div> |
| </div> |
| </footer> |
| |
| |
| </body> |
| </html> |