blob: 56ae4eaaa1cba5414b55d43e32c4c3d1c6aa77a9 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Joins &#8212; Apache Arrow DataFusion documentation</title>
<link href="../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/pydata-sphinx-theme.css?v=1140d252" />
<link rel="stylesheet" type="text/css" href="../../_static/graphviz.css?v=4ae1632d" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css?v=dca7052a" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script src="../../_static/documentation_options.js?v=8a448e45"></script>
<script src="../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Functions" href="functions.html" />
<link rel="prev" title="Expressions" href="expressions.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/images/2x_bgwhite_original.png" class="logo" alt="logo">
</a>
<form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
LINKS
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python">
Github and Issue Tracker
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/">
Rust's API Docs
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">
Code of conduct
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python/tree/main/examples">
Examples
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
USER GUIDE
</span>
</p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../basics.html">
Concepts
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../data-sources.html">
Data Sources
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../dataframe/index.html">
DataFrames
</a>
<input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/>
<label for="toctree-checkbox-1">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../dataframe/rendering.html">
HTML Rendering in Jupyter
</a>
</li>
</ul>
</li>
<li class="toctree-l1 current active has-children">
<a class="reference internal" href="index.html">
Common Operations
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/>
<label for="toctree-checkbox-2">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l2">
<a class="reference internal" href="views.html">
Registering Views
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="basic-info.html">
Basic Operations
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="select-and-filter.html">
Column Selections
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="expressions.html">
Expressions
</a>
</li>
<li class="toctree-l2 current active">
<a class="current reference internal" href="#">
Joins
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="functions.html">
Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="aggregations.html">
Aggregation
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="windows.html">
Window Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="udf-and-udfa.html">
User-Defined Functions
</a>
</li>
</ul>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../io/index.html">
IO
</a>
<input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/>
<label for="toctree-checkbox-3">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../io/arrow.html">
Arrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/avro.html">
Avro
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/csv.html">
CSV
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/json.html">
JSON
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/parquet.html">
Parquet
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/table_provider.html">
Custom Table Provider
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../configuration.html">
Configuration
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../sql.html">
SQL
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
CONTRIBUTOR GUIDE
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../contributor-guide/introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../contributor-guide/ffi.html">
Python Extensions
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
API
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../autoapi/index.html">
API Reference
</a>
<input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/>
<label for="toctree-checkbox-4">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../../autoapi/datafusion/index.html">
datafusion
</a>
<input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/>
<label for="toctree-checkbox-5">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/catalog/index.html">
datafusion.catalog
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/context/index.html">
datafusion.context
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/dataframe/index.html">
datafusion.dataframe
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/dataframe_formatter/index.html">
datafusion.dataframe_formatter
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/expr/index.html">
datafusion.expr
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/functions/index.html">
datafusion.functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/html_formatter/index.html">
datafusion.html_formatter
</a>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../autoapi/datafusion/input/index.html">
datafusion.input
</a>
<input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/>
<label for="toctree-checkbox-6">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../autoapi/datafusion/input/base/index.html">
datafusion.input.base
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../autoapi/datafusion/input/location/index.html">
datafusion.input.location
</a>
</li>
</ul>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/io/index.html">
datafusion.io
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/object_store/index.html">
datafusion.object_store
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/plan/index.html">
datafusion.plan
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/record_batch/index.html">
datafusion.record_batch
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/substrait/index.html">
datafusion.substrait
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/unparser/index.html">
datafusion.unparser
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html">
datafusion.user_defined
</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#inner-join">
Inner Join
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#left-join">
Left Join
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#full-join">
Full Join
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#left-semi-join">
Left Semi Join
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#left-anti-join">
Left Anti Join
</a>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<section id="joins">
<h1>Joins<a class="headerlink" href="#joins" title="Link to this heading"></a></h1>
<p>DataFusion supports the following join variants via the method <a class="reference internal" href="../../autoapi/datafusion/dataframe/index.html#datafusion.dataframe.DataFrame.join" title="datafusion.dataframe.DataFrame.join"><code class="xref py py-func docutils literal notranslate"><span class="pre">join()</span></code></a></p>
<ul class="simple">
<li><p>Inner Join</p></li>
<li><p>Left Join</p></li>
<li><p>Right Join</p></li>
<li><p>Full Join</p></li>
<li><p>Left Semi Join</p></li>
<li><p>Left Anti Join</p></li>
</ul>
<p>For the examples in this section we’ll use the following two DataFrames</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">1</span><span class="p">]:</span> <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">SessionContext</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">2</span><span class="p">]:</span> <span class="n">ctx</span> <span class="o">=</span> <span class="n">SessionContext</span><span class="p">()</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">3</span><span class="p">]:</span> <span class="n">left</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">from_pydict</span><span class="p">(</span>
<span class="o">...</span><span class="p">:</span> <span class="p">{</span>
<span class="o">...</span><span class="p">:</span> <span class="s2">&quot;customer_id&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="o">...</span><span class="p">:</span> <span class="s2">&quot;customer&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Alice&quot;</span><span class="p">,</span> <span class="s2">&quot;Bob&quot;</span><span class="p">,</span> <span class="s2">&quot;Charlie&quot;</span><span class="p">],</span>
<span class="o">...</span><span class="p">:</span> <span class="p">}</span>
<span class="o">...</span><span class="p">:</span> <span class="p">)</span>
<span class="o">...</span><span class="p">:</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">4</span><span class="p">]:</span> <span class="n">right</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">([</span>
<span class="o">...</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;CityCabs&quot;</span><span class="p">},</span>
<span class="o">...</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;MetroRide&quot;</span><span class="p">},</span>
<span class="o">...</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;UrbanGo&quot;</span><span class="p">},</span>
<span class="o">...</span><span class="p">:</span> <span class="p">])</span>
<span class="o">...</span><span class="p">:</span>
</pre></div>
</div>
<section id="inner-join">
<h2>Inner Join<a class="headerlink" href="#inner-join" title="Link to this heading"></a></h2>
<p>When using an inner join, only rows containing the common values between the two join columns present in both DataFrames
will be included in the resulting DataFrame.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">5</span><span class="p">]:</span> <span class="n">left</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s2">&quot;customer_id&quot;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;inner&quot;</span><span class="p">)</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">5</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+-------------+----------+----+-----------+</span>
<span class="o">|</span> <span class="n">customer_id</span> <span class="o">|</span> <span class="n">customer</span> <span class="o">|</span> <span class="nb">id</span> <span class="o">|</span> <span class="n">name</span> <span class="o">|</span>
<span class="o">+-------------+----------+----+-----------+</span>
<span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Alice</span> <span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">CityCabs</span> <span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">Bob</span> <span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">MetroRide</span> <span class="o">|</span>
<span class="o">+-------------+----------+----+-----------+</span>
</pre></div>
</div>
<p>The parameter <code class="docutils literal notranslate"><span class="pre">join_keys</span></code> specifies the columns from the left DataFrame and right DataFrame that contains the values
that should match.</p>
</section>
<section id="left-join">
<h2>Left Join<a class="headerlink" href="#left-join" title="Link to this heading"></a></h2>
<p>A left join combines rows from two DataFrames using the key columns. It returns all rows from the left DataFrame and
matching rows from the right DataFrame. If there’s no match in the right DataFrame, it returns null
values for the corresponding columns.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">6</span><span class="p">]:</span> <span class="n">left</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s2">&quot;customer_id&quot;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;left&quot;</span><span class="p">)</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">6</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+-------------+----------+----+-----------+</span>
<span class="o">|</span> <span class="n">customer_id</span> <span class="o">|</span> <span class="n">customer</span> <span class="o">|</span> <span class="nb">id</span> <span class="o">|</span> <span class="n">name</span> <span class="o">|</span>
<span class="o">+-------------+----------+----+-----------+</span>
<span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Alice</span> <span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">CityCabs</span> <span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">Bob</span> <span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">MetroRide</span> <span class="o">|</span>
<span class="o">|</span> <span class="mi">3</span> <span class="o">|</span> <span class="n">Charlie</span> <span class="o">|</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">+-------------+----------+----+-----------+</span>
</pre></div>
</div>
</section>
<section id="full-join">
<h2>Full Join<a class="headerlink" href="#full-join" title="Link to this heading"></a></h2>
<p>A full join merges rows from two tables based on a related column, returning all rows from both tables, even if there
is no match. Unmatched rows will have null values.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">7</span><span class="p">]:</span> <span class="n">left</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s2">&quot;customer_id&quot;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;full&quot;</span><span class="p">)</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">7</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+-------------+----------+----+-----------+</span>
<span class="o">|</span> <span class="n">customer_id</span> <span class="o">|</span> <span class="n">customer</span> <span class="o">|</span> <span class="nb">id</span> <span class="o">|</span> <span class="n">name</span> <span class="o">|</span>
<span class="o">+-------------+----------+----+-----------+</span>
<span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Alice</span> <span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">CityCabs</span> <span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">Bob</span> <span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">MetroRide</span> <span class="o">|</span>
<span class="o">|</span> <span class="o">|</span> <span class="o">|</span> <span class="mi">5</span> <span class="o">|</span> <span class="n">UrbanGo</span> <span class="o">|</span>
<span class="o">|</span> <span class="mi">3</span> <span class="o">|</span> <span class="n">Charlie</span> <span class="o">|</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">+-------------+----------+----+-----------+</span>
</pre></div>
</div>
</section>
<section id="left-semi-join">
<h2>Left Semi Join<a class="headerlink" href="#left-semi-join" title="Link to this heading"></a></h2>
<p>A left semi join retrieves matching rows from the left table while
omitting duplicates with multiple matches in the right table.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">8</span><span class="p">]:</span> <span class="n">left</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s2">&quot;customer_id&quot;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;semi&quot;</span><span class="p">)</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">8</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+-------------+----------+</span>
<span class="o">|</span> <span class="n">customer_id</span> <span class="o">|</span> <span class="n">customer</span> <span class="o">|</span>
<span class="o">+-------------+----------+</span>
<span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Alice</span> <span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">Bob</span> <span class="o">|</span>
<span class="o">+-------------+----------+</span>
</pre></div>
</div>
</section>
<section id="left-anti-join">
<h2>Left Anti Join<a class="headerlink" href="#left-anti-join" title="Link to this heading"></a></h2>
<p>A left anti join shows all rows from the left table without any matching rows in the right table,
based on a the specified matching columns. It excludes rows from the left table that have at least one matching row in
the right table.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">9</span><span class="p">]:</span> <span class="n">left</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s2">&quot;customer_id&quot;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;anti&quot;</span><span class="p">)</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">9</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+-------------+----------+</span>
<span class="o">|</span> <span class="n">customer_id</span> <span class="o">|</span> <span class="n">customer</span> <span class="o">|</span>
<span class="o">+-------------+----------+</span>
<span class="o">|</span> <span class="mi">3</span> <span class="o">|</span> <span class="n">Charlie</span> <span class="o">|</span>
<span class="o">+-------------+----------+</span>
</pre></div>
</div>
</section>
</section>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="expressions.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Expressions</p>
</div>
</a>
<a class='right-next' id="next-link" href="functions.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Functions</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<!-- Based on pydata_sphinx_theme/footer.html -->
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright 2019-2024, Apache Software Foundation.<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 8.1.3.<br>
</p>
</div>
<div class="footer-item">
<p>Apache Arrow DataFusion, Arrow DataFusion, Apache, the Apache feather logo, and the Apache Arrow DataFusion project logo</p>
<p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</div>
</footer>
</body>
</html>