blob: b302fbec68c58d5f4ab9cb7250546b6acf7bddfb [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>datafusion.dataframe &#8212; Apache Arrow DataFusion documentation</title>
<link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/pydata-sphinx-theme.css?v=1140d252" />
<link rel="stylesheet" type="text/css" href="../../../_static/graphviz.css?v=4ae1632d" />
<link rel="stylesheet" type="text/css" href="../../../_static/theme_overrides.css?v=dca7052a" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script src="../../../_static/documentation_options.js?v=8a448e45"></script>
<script src="../../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="datafusion.dataframe_formatter" href="../dataframe_formatter/index.html" />
<link rel="prev" title="datafusion.context" href="../context/index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/images/2x_bgwhite_original.png" class="logo" alt="logo">
</a>
<form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
LINKS
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python">
Github and Issue Tracker
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/">
Rust's API Docs
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">
Code of conduct
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python/tree/main/examples">
Examples
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
USER GUIDE
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/basics.html">
Concepts
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/data-sources.html">
Data Sources
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../../user-guide/dataframe/index.html">
DataFrames
</a>
<input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/>
<label for="toctree-checkbox-1">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/dataframe/rendering.html">
HTML Rendering in Jupyter
</a>
</li>
</ul>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../../user-guide/common-operations/index.html">
Common Operations
</a>
<input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/>
<label for="toctree-checkbox-2">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/views.html">
Registering Views
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/basic-info.html">
Basic Operations
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/select-and-filter.html">
Column Selections
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/expressions.html">
Expressions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/joins.html">
Joins
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/functions.html">
Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/aggregations.html">
Aggregation
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/windows.html">
Window Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/udf-and-udfa.html">
User-Defined Functions
</a>
</li>
</ul>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../../user-guide/io/index.html">
IO
</a>
<input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/>
<label for="toctree-checkbox-3">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/arrow.html">
Arrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/avro.html">
Avro
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/csv.html">
CSV
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/json.html">
JSON
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/parquet.html">
Parquet
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/table_provider.html">
Custom Table Provider
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/configuration.html">
Configuration
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/sql.html">
SQL
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
CONTRIBUTOR GUIDE
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../../contributor-guide/introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../contributor-guide/ffi.html">
Python Extensions
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
API
</span>
</p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1 current active has-children">
<a class="reference internal" href="../../index.html">
API Reference
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/>
<label for="toctree-checkbox-4">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l2 current active has-children">
<a class="reference internal" href="../index.html">
datafusion
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/>
<label for="toctree-checkbox-5">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l3">
<a class="reference internal" href="../catalog/index.html">
datafusion.catalog
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../context/index.html">
datafusion.context
</a>
</li>
<li class="toctree-l3 current active">
<a class="current reference internal" href="#">
datafusion.dataframe
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../dataframe_formatter/index.html">
datafusion.dataframe_formatter
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../expr/index.html">
datafusion.expr
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../functions/index.html">
datafusion.functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../html_formatter/index.html">
datafusion.html_formatter
</a>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../input/index.html">
datafusion.input
</a>
<input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/>
<label for="toctree-checkbox-6">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../input/base/index.html">
datafusion.input.base
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../input/location/index.html">
datafusion.input.location
</a>
</li>
</ul>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../io/index.html">
datafusion.io
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../object_store/index.html">
datafusion.object_store
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../plan/index.html">
datafusion.plan
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../record_batch/index.html">
datafusion.record_batch
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../substrait/index.html">
datafusion.substrait
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../unparser/index.html">
datafusion.unparser
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../user_defined/index.html">
datafusion.user_defined
</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#classes">
Classes
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#module-contents">
Module Contents
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression">
<code class="docutils literal notranslate">
<span class="pre">
Compression
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.from_str">
<code class="docutils literal notranslate">
<span class="pre">
Compression.from_str()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.get_default_level">
<code class="docutils literal notranslate">
<span class="pre">
Compression.get_default_level()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.BROTLI">
<code class="docutils literal notranslate">
<span class="pre">
Compression.BROTLI
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.GZIP">
<code class="docutils literal notranslate">
<span class="pre">
Compression.GZIP
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.LZ4">
<code class="docutils literal notranslate">
<span class="pre">
Compression.LZ4
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.LZ4_RAW">
<code class="docutils literal notranslate">
<span class="pre">
Compression.LZ4_RAW
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.SNAPPY">
<code class="docutils literal notranslate">
<span class="pre">
Compression.SNAPPY
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.UNCOMPRESSED">
<code class="docutils literal notranslate">
<span class="pre">
Compression.UNCOMPRESSED
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.ZSTD">
<code class="docutils literal notranslate">
<span class="pre">
Compression.ZSTD
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__arrow_c_stream__">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.__arrow_c_stream__()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__getitem__">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.__getitem__()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__repr__">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.__repr__()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame._repr_html_">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame._repr_html_()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.aggregate">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.aggregate()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.cache">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.cache()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.cast">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.cast()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.collect">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.collect()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.collect_partitioned">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.collect_partitioned()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.count">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.count()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.default_str_repr">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.default_str_repr()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.describe">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.describe()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.distinct">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.distinct()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.drop">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.drop()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.except_all">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.except_all()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execute_stream">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.execute_stream()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execute_stream_partitioned">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.execute_stream_partitioned()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execution_plan">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.execution_plan()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.explain">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.explain()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.fill_null">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.fill_null()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.filter">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.filter()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.head">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.head()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.intersect">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.intersect()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.into_view">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.into_view()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.join">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.join()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.join_on">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.join_on()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.limit">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.limit()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.logical_plan">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.logical_plan()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.optimized_logical_plan">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.optimized_logical_plan()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.parse_sql_expr">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.parse_sql_expr()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.repartition">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.repartition()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.repartition_by_hash">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.repartition_by_hash()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.schema">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.schema()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.select">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.select()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.select_columns">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.select_columns()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.show">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.show()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.sort">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.sort()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.tail">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.tail()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_arrow_table">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_arrow_table()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pandas">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_pandas()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_polars">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_polars()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pydict">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_pydict()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pylist">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_pylist()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.transform">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.transform()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.union">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.union()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.union_distinct">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.union_distinct()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.unnest_columns">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.unnest_columns()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_column">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.with_column()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_column_renamed">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.with_column_renamed()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_columns">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.with_columns()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_csv">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_csv()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_json">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_json()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_parquet">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_parquet()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_parquet_with_options">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_parquet_with_options()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_table">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_table()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.df">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.df
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrameWriteOptions">
<code class="docutils literal notranslate">
<span class="pre">
DataFrameWriteOptions
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrameWriteOptions._raw_write_options">
<code class="docutils literal notranslate">
<span class="pre">
DataFrameWriteOptions._raw_write_options
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp">
<code class="docutils literal notranslate">
<span class="pre">
InsertOp
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp.APPEND">
<code class="docutils literal notranslate">
<span class="pre">
InsertOp.APPEND
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp.OVERWRITE">
<code class="docutils literal notranslate">
<span class="pre">
InsertOp.OVERWRITE
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp.REPLACE">
<code class="docutils literal notranslate">
<span class="pre">
InsertOp.REPLACE
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_fpp
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_ndv
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.compression">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.compression
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.dictionary_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.dictionary_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.encoding">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.encoding
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.statistics_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.statistics_enabled
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.allow_single_file_parallelism
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_fpp
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_ndv
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_on_write
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.column_index_truncate_length
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.column_specific_options">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.column_specific_options
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.created_by">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.created_by
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.data_page_row_count_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.data_pagesize_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.dictionary_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.dictionary_page_size_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.encoding">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.encoding
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.max_row_group_size">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.max_row_group_size
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.maximum_buffered_record_batches_per_stream
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.maximum_parallel_row_group_writers
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.skip_arrow_metadata
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.statistics_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.statistics_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.statistics_truncate_length
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.write_batch_size">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.write_batch_size
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.writer_version">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.writer_version
</span>
</code>
</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<section id="module-datafusion.dataframe">
<span id="datafusion-dataframe"></span><h1>datafusion.dataframe<a class="headerlink" href="#module-datafusion.dataframe" title="Link to this heading"></a></h1>
<p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> is one of the core concepts in DataFusion.</p>
<p>See <a class="reference internal" href="../../../user-guide/basics.html#user-guide-concepts"><span class="std std-ref">Concepts</span></a> in the online documentation for more information.</p>
<section id="classes">
<h2>Classes<a class="headerlink" href="#classes" title="Link to this heading"></a></h2>
<table class="autosummary longtable table autosummary">
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Compression</span></code></a></p></td>
<td><p>Enum representing the available compression types for Parquet files.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame</span></code></a></p></td>
<td><p>Two dimensional table representation of data.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriteOptions</span></code></a></p></td>
<td><p>Writer options for DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.InsertOp" title="datafusion.dataframe.InsertOp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InsertOp</span></code></a></p></td>
<td><p>Insert operation mode.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.ParquetColumnOptions" title="datafusion.dataframe.ParquetColumnOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ParquetColumnOptions</span></code></a></p></td>
<td><p>Parquet options for individual columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ParquetWriterOptions</span></code></a></p></td>
<td><p>Advanced parquet writer options.</p></td>
</tr>
</tbody>
</table>
</section>
<section id="module-contents">
<h2>Module Contents<a class="headerlink" href="#module-contents" title="Link to this heading"></a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">Compression</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwds</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.Compression" title="Link to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-obj docutils literal notranslate"><span class="pre">enum.Enum</span></code></p>
<p>Enum representing the available compression types for Parquet files.</p>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.from_str">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_str</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><span class="pre">Compression</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.Compression.from_str" title="Link to this definition"></a></dt>
<dd><p>Convert a string to a Compression enum value.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>value</strong> – The string representation of the compression type.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>The Compression enum lowercase value.</p>
</dd>
<dt class="field-odd">Raises<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>ValueError</strong> – If the string does not match any Compression enum value.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.get_default_level">
<span class="sig-name descname"><span class="pre">get_default_level</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.Compression.get_default_level" title="Link to this definition"></a></dt>
<dd><p>Get the default compression level for the compression type.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>The default compression level for the compression type.</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.BROTLI">
<span class="sig-name descname"><span class="pre">BROTLI</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'brotli'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.BROTLI" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.GZIP">
<span class="sig-name descname"><span class="pre">GZIP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'gzip'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.GZIP" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.LZ4">
<span class="sig-name descname"><span class="pre">LZ4</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'lz4'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.LZ4" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.LZ4_RAW">
<span class="sig-name descname"><span class="pre">LZ4_RAW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'lz4_raw'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.LZ4_RAW" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.SNAPPY">
<span class="sig-name descname"><span class="pre">SNAPPY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'snappy'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.SNAPPY" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.UNCOMPRESSED">
<span class="sig-name descname"><span class="pre">UNCOMPRESSED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'uncompressed'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.UNCOMPRESSED" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.ZSTD">
<span class="sig-name descname"><span class="pre">ZSTD</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'zstd'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.ZSTD" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">DataFrame</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">datafusion._internal.DataFrame</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.DataFrame" title="Link to this definition"></a></dt>
<dd><p>Two dimensional table representation of data.</p>
<p>See <a class="reference internal" href="../../../user-guide/basics.html#user-guide-concepts"><span class="std std-ref">Concepts</span></a> in the online documentation for more information.</p>
<p>This constructor is not to be used by the end user.</p>
<p>See <a class="reference internal" href="../context/index.html#datafusion.context.SessionContext" title="datafusion.context.SessionContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">SessionContext</span></code></a> for methods to
create a <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__arrow_c_stream__">
<span class="sig-name descname"><span class="pre">__arrow_c_stream__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">requested_schema</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">object</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">object</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__arrow_c_stream__" title="Link to this definition"></a></dt>
<dd><p>Export an Arrow PyCapsule Stream.</p>
<p>This will execute and collect the DataFrame. We will attempt to respect the
requested schema, but only trivial transformations will be applied such as only
returning the fields listed in the requested schema if their data types match
those in the DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>requested_schema</strong> – Attempt to provide the DataFrame using this schema.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Arrow PyCapsule object.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__getitem__">
<span class="sig-name descname"><span class="pre">__getitem__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">key</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__getitem__" title="Link to this definition"></a></dt>
<dd><p>Return a new :py:class`DataFrame` with the specified column or columns.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>key</strong> – Column name or list of column names to select.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the specified column or columns.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__repr__">
<span class="sig-name descname"><span class="pre">__repr__</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__repr__" title="Link to this definition"></a></dt>
<dd><p>Return a string representation of the DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>String representation of the DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame._repr_html_">
<span class="sig-name descname"><span class="pre">_repr_html_</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame._repr_html_" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.aggregate">
<span class="sig-name descname"><span class="pre">aggregate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">aggs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.aggregate" title="Link to this definition"></a></dt>
<dd><p>Aggregates the rows of the current DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>group_by</strong> – Sequence of expressions or column names to group by.</p></li>
<li><p><strong>aggs</strong> – Sequence of expressions to aggregate.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after aggregation.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.cache">
<span class="sig-name descname"><span class="pre">cache</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.cache" title="Link to this definition"></a></dt>
<dd><p>Cache the DataFrame as a memory table.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Cached DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.cast">
<span class="sig-name descname"><span class="pre">cast</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">pyarrow.DataType</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.cast" title="Link to this definition"></a></dt>
<dd><p>Cast one or more columns to a different data type.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>mapping</strong> – Mapped with column as key and column dtype as value.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after casting columns</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.collect">
<span class="sig-name descname"><span class="pre">collect</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.collect" title="Link to this definition"></a></dt>
<dd><p>Execute this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and collect results into memory.</p>
<p>Prior to calling <code class="docutils literal notranslate"><span class="pre">collect</span></code>, modifying a DataFrame simply updates a plan
(no actual computation is performed). Calling <code class="docutils literal notranslate"><span class="pre">collect</span></code> triggers the
computation.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>List of <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code> collected from the DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.collect_partitioned">
<span class="sig-name descname"><span class="pre">collect_partitioned</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.collect_partitioned" title="Link to this definition"></a></dt>
<dd><p>Execute this DataFrame and collect all partitioned results.</p>
<p>This operation returns <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code> maintaining the input
partitioning.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p><dl class="simple">
<dt>List of list of <code class="xref py py-class docutils literal notranslate"><span class="pre">RecordBatch</span></code> collected from the</dt><dd><p>DataFrame.</p>
</dd>
</dl>
</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.count">
<span class="sig-name descname"><span class="pre">count</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.count" title="Link to this definition"></a></dt>
<dd><p>Return the total number of rows in this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>Note that this method will actually run a plan to calculate the
count, which may be slow for large or complicated DataFrames.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Number of rows in the DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.default_str_repr">
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">default_str_repr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">batches</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">schema</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">pyarrow.Schema</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_more</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">table_uuid</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.default_str_repr" title="Link to this definition"></a></dt>
<dd><p>Return the default string representation of a DataFrame.</p>
<p>This method is used by the default formatter and implemented in Rust for
performance reasons.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.describe">
<span class="sig-name descname"><span class="pre">describe</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.describe" title="Link to this definition"></a></dt>
<dd><p>Return the statistics for this DataFrame.</p>
<p>Only summarized numeric datatypes at the moments and returns nulls
for non-numeric datatypes.</p>
<p>The output format is modeled after pandas.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>A summary DataFrame containing statistics.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.distinct">
<span class="sig-name descname"><span class="pre">distinct</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.distinct" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with all duplicated rows removed.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>DataFrame after removing duplicates.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.drop">
<span class="sig-name descname"><span class="pre">drop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.drop" title="Link to this definition"></a></dt>
<dd><p>Drop arbitrary amount of columns.</p>
<p>Column names are case-sensitive and do not require double quotes like
other operations such as <cite>select</cite>. Leading and trailing double quotes
are allowed and will be automatically stripped if present.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>columns</strong> – Column names to drop from the dataframe. Both <code class="docutils literal notranslate"><span class="pre">column_name</span></code>
and <code class="docutils literal notranslate"><span class="pre">&quot;column_name&quot;</span></code> are accepted.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with those columns removed in the projection.</p>
</dd>
</dl>
<p>Example Usage:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">&#39;ID_For_Students&#39;</span><span class="p">)</span> <span class="c1"># Works</span>
<span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">&#39;&quot;ID_For_Students&quot;&#39;</span><span class="p">)</span> <span class="c1"># Also works (quotes stripped)</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.except_all">
<span class="sig-name descname"><span class="pre">except_all</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.except_all" title="Link to this definition"></a></dt>
<dd><p>Calculate the exception of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>other</strong> – DataFrame to calculate exception with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after exception.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execute_stream">
<span class="sig-name descname"><span class="pre">execute_stream</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../record_batch/index.html#datafusion.record_batch.RecordBatchStream" title="datafusion.record_batch.RecordBatchStream"><span class="pre">datafusion.record_batch.RecordBatchStream</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execute_stream" title="Link to this definition"></a></dt>
<dd><p>Executes this DataFrame and returns a stream over a single partition.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Record Batch Stream over a single partition.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execute_stream_partitioned">
<span class="sig-name descname"><span class="pre">execute_stream_partitioned</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../record_batch/index.html#datafusion.record_batch.RecordBatchStream" title="datafusion.record_batch.RecordBatchStream"><span class="pre">datafusion.record_batch.RecordBatchStream</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execute_stream_partitioned" title="Link to this definition"></a></dt>
<dd><p>Executes this DataFrame and returns a stream for each partition.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>One record batch stream per partition.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execution_plan">
<span class="sig-name descname"><span class="pre">execution_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.ExecutionPlan" title="datafusion.plan.ExecutionPlan"><span class="pre">datafusion.plan.ExecutionPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execution_plan" title="Link to this definition"></a></dt>
<dd><p>Return the execution/physical plan.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Execution plan.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.explain">
<span class="sig-name descname"><span class="pre">explain</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">analyze</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.explain" title="Link to this definition"></a></dt>
<dd><p>Print an explanation of the DataFrame’s plan so far.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">analyze</span></code> is specified, runs the plan and reports metrics.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>verbose</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, more details will be included.</p></li>
<li><p><strong>analyze</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, the plan will run and metrics reported.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.fill_null">
<span class="sig-name descname"><span class="pre">fill_null</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">subset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.fill_null" title="Link to this definition"></a></dt>
<dd><p>Fill null values in specified columns with a value.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>value</strong> – Value to replace nulls with. Will be cast to match column type.</p></li>
<li><p><strong>subset</strong> – Optional list of column names to fill. If None, fills all columns.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with null values replaced where type casting is possible</p>
</dd>
</dl>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">fill_null</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># Fill all nulls with 0 where possible</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># Fill nulls in specific string columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">fill_null</span><span class="p">(</span><span class="s2">&quot;missing&quot;</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;category&quot;</span><span class="p">])</span>
</pre></div>
</div>
<p class="rubric">Notes</p>
<ul class="simple">
<li><p>Only fills nulls in columns where the value can be cast to the column type</p></li>
<li><p>For columns where casting fails, the original column is kept unchanged</p></li>
<li><p>For columns not in subset, the original column is kept unchanged</p></li>
</ul>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.filter">
<span class="sig-name descname"><span class="pre">filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">predicates</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.filter" title="Link to this definition"></a></dt>
<dd><p>Return a DataFrame for which <code class="docutils literal notranslate"><span class="pre">predicate</span></code> evaluates to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p>
<p>Rows for which <code class="docutils literal notranslate"><span class="pre">predicate</span></code> evaluates to <code class="docutils literal notranslate"><span class="pre">False</span></code> or <code class="docutils literal notranslate"><span class="pre">None</span></code> are filtered
out. If more than one predicate is provided, these predicates will be
combined as a logical AND. Each <code class="docutils literal notranslate"><span class="pre">predicate</span></code> must be an
<a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> created using helper functions such as
<a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a> or <a class="reference internal" href="../index.html#datafusion.lit" title="datafusion.lit"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.lit()</span></code></a>.
If more complex logic is required, see the logical operations in
<a class="reference internal" href="../functions/index.html#module-datafusion.functions" title="datafusion.functions"><code class="xref py py-mod docutils literal notranslate"><span class="pre">functions</span></code></a>.</p>
<p>Example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span>
<span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>predicates</strong> – Predicate expression(s) to filter the DataFrame.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after filtering.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.head">
<span class="sig-name descname"><span class="pre">head</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.head" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>n</strong> – Number of rows to take from the head of the DataFrame.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after limiting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.intersect">
<span class="sig-name descname"><span class="pre">intersect</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.intersect" title="Link to this definition"></a></dt>
<dd><p>Calculate the intersection of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>other</strong> – DataFrame to intersect with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after intersection.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.into_view">
<span class="sig-name descname"><span class="pre">into_view</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../catalog/index.html#datafusion.catalog.Table" title="datafusion.catalog.Table"><span class="pre">datafusion.catalog.Table</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.into_view" title="Link to this definition"></a></dt>
<dd><p>Convert <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> into a <a class="reference internal" href="../index.html#datafusion.Table" title="datafusion.Table"><code class="xref py py-class docutils literal notranslate"><span class="pre">Table</span></code></a>.</p>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">SessionContext</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ctx</span> <span class="o">=</span> <span class="n">SessionContext</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">&quot;SELECT 1 AS value&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">view</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">into_view</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ctx</span><span class="o">.</span><span class="n">register_table</span><span class="p">(</span><span class="s2">&quot;values_view&quot;</span><span class="p">,</span> <span class="n">view</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="c1"># The DataFrame is still usable</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ctx</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">&quot;SELECT value FROM values_view&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.join">
<span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.join" title="Link to this definition"></a></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span></dt>
<dd><p>Join this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with another <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p><cite>on</cite> has to be provided or both <cite>left_on</cite> and <cite>right_on</cite> in conjunction.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>right</strong> – Other DataFrame to join with.</p></li>
<li><p><strong>on</strong> – Column names to join on in both dataframes.</p></li>
<li><p><strong>how</strong> – Type of join to perform. Supported types are “inner”, “left”,
“right”, “full”, “semi”, “anti”.</p></li>
<li><p><strong>left_on</strong> – Join column of the left dataframe.</p></li>
<li><p><strong>right_on</strong> – Join column of the right dataframe.</p></li>
<li><p><strong>join_keys</strong> – Tuple of two lists of column names to join on. [Deprecated]</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after join.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.join_on">
<span class="sig-name descname"><span class="pre">join_on</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">on_exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.join_on" title="Link to this definition"></a></dt>
<dd><p>Join two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified expressions.</p>
<p>Join predicates must be <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> objects, typically
built with <a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a>. On expressions are used to support
in-equality predicates. Equality predicates are correctly optimized.</p>
<p>Example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span>
<span class="n">df</span><span class="o">.</span><span class="n">join_on</span><span class="p">(</span><span class="n">other_df</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;id&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;other_id&quot;</span><span class="p">))</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>right</strong> – Other DataFrame to join with.</p></li>
<li><p><strong>on_exprs</strong> – single or multiple (in)-equality predicates.</p></li>
<li><p><strong>how</strong> – Type of join to perform. Supported types are “inner”, “left”,
“right”, “full”, “semi”, “anti”.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after join.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.limit">
<span class="sig-name descname"><span class="pre">limit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.limit" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>count</strong> – Number of rows to limit the DataFrame to.</p></li>
<li><p><strong>offset</strong> – Number of rows to skip.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after limiting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.logical_plan">
<span class="sig-name descname"><span class="pre">logical_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.LogicalPlan" title="datafusion.plan.LogicalPlan"><span class="pre">datafusion.plan.LogicalPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.logical_plan" title="Link to this definition"></a></dt>
<dd><p>Return the unoptimized <code class="docutils literal notranslate"><span class="pre">LogicalPlan</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Unoptimized logical plan.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.optimized_logical_plan">
<span class="sig-name descname"><span class="pre">optimized_logical_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.LogicalPlan" title="datafusion.plan.LogicalPlan"><span class="pre">datafusion.plan.LogicalPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.optimized_logical_plan" title="Link to this definition"></a></dt>
<dd><p>Return the optimized <code class="docutils literal notranslate"><span class="pre">LogicalPlan</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Optimized logical plan.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.parse_sql_expr">
<span class="sig-name descname"><span class="pre">parse_sql_expr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">expr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.parse_sql_expr" title="Link to this definition"></a></dt>
<dd><p>Creates logical expression from a SQL query text.</p>
<p>The expression is created and processed against the current schema.</p>
<p>Example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span>
<span class="n">df</span><span class="o">.</span><span class="n">parse_sql_expr</span><span class="p">(</span><span class="s2">&quot;a &gt; 1&quot;</span><span class="p">)</span>
<span class="n">should</span> <span class="n">produce</span><span class="p">:</span>
<span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>expr</strong> – Expression string to be converted to datafusion expression</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Logical expression .</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.repartition">
<span class="sig-name descname"><span class="pre">repartition</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.repartition" title="Link to this definition"></a></dt>
<dd><p>Repartition a DataFrame into <code class="docutils literal notranslate"><span class="pre">num</span></code> partitions.</p>
<p>The batches allocation uses a round-robin algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>num</strong> – Number of partitions to repartition the DataFrame into.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Repartitioned DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.repartition_by_hash">
<span class="sig-name descname"><span class="pre">repartition_by_hash</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.repartition_by_hash" title="Link to this definition"></a></dt>
<dd><p>Repartition a DataFrame using a hash partitioning scheme.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>exprs</strong> – Expressions to evaluate and perform hashing on.</p></li>
<li><p><strong>num</strong> – Number of partitions to repartition the DataFrame into.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Repartitioned DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.schema">
<span class="sig-name descname"><span class="pre">schema</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Schema</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.schema" title="Link to this definition"></a></dt>
<dd><p>Return the <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.Schema</span></code> of this DataFrame.</p>
<p>The output schema contains information on the name, data type, and
nullability for each column.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Describing schema of the DataFrame</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.select">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.select" title="Link to this definition"></a></dt>
<dd><p>Project arbitrary expressions into a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>exprs</strong> – Either column names or <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> to select.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after projection. It has one column for each expression.</p>
</dd>
</dl>
<p>Example usage:</p>
<p>The following example will return 3 columns from the original dataframe.
The first two columns will be the original column <code class="docutils literal notranslate"><span class="pre">a</span></code> and <code class="docutils literal notranslate"><span class="pre">b</span></code> since the
string “a” is assumed to refer to column selection. Also a duplicate of
column <code class="docutils literal notranslate"><span class="pre">a</span></code> will be returned with the column name <code class="docutils literal notranslate"><span class="pre">alternate_a</span></code>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;alternate_a&quot;</span><span class="p">))</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.select_columns">
<span class="sig-name descname"><span class="pre">select_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.select_columns" title="Link to this definition"></a></dt>
<dd><p>Filter the DataFrame by columns.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>DataFrame only containing the specified columns.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.show">
<span class="sig-name descname"><span class="pre">show</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.show" title="Link to this definition"></a></dt>
<dd><p>Execute the DataFrame and print the result to the console.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>num</strong> – Number of lines to show.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.sort">
<span class="sig-name descname"><span class="pre">sort</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">datafusion.expr.SortKey</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.sort" title="Link to this definition"></a></dt>
<dd><p>Sort the DataFrame by the specified sorting expressions or column names.</p>
<p>Note that any expression can be turned into a sort expression by
calling its <code class="docutils literal notranslate"><span class="pre">sort</span></code> method.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>exprs</strong> – Sort expressions or column names, applied in order.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after sorting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.tail">
<span class="sig-name descname"><span class="pre">tail</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.tail" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p>
<p>Be aware this could be potentially expensive since the row size needs to be
determined of the dataframe. This is done by collecting it.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>n</strong> – Number of rows to take from the tail of the DataFrame.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after limiting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_arrow_table">
<span class="sig-name descname"><span class="pre">to_arrow_table</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Table</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_arrow_table" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into an Arrow Table.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Arrow Table.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pandas">
<span class="sig-name descname"><span class="pre">to_pandas</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pandas.DataFrame</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pandas" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a Pandas DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Pandas DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_polars">
<span class="sig-name descname"><span class="pre">to_polars</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">polars.DataFrame</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_polars" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a Polars DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Polars DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pydict">
<span class="sig-name descname"><span class="pre">to_pydict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pydict" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a dictionary of lists.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Dictionary of lists.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pylist">
<span class="sig-name descname"><span class="pre">to_pylist</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pylist" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a list of dictionaries.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>List of dictionaries.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.transform">
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="pre">Ellipsis</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.transform" title="Link to this definition"></a></dt>
<dd><p>Apply a function to the current DataFrame which returns another DataFrame.</p>
<p>This is useful for chaining together multiple functions. For example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">add_3</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">with_column</span><span class="p">(</span><span class="s2">&quot;modified&quot;</span><span class="p">,</span> <span class="n">lit</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
<span class="k">def</span><span class="w"> </span><span class="nf">within_limit</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">limit</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">modify_df</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">within_limit</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>func</strong> – A callable function that takes a DataFrame as it’s first argument</p></li>
<li><p><strong>args</strong> – Zero or more arguments to pass to <cite>func</cite></p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>After applying func to the original dataframe.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame">DataFrame</a></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.union">
<span class="sig-name descname"><span class="pre">union</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">distinct</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.union" title="Link to this definition"></a></dt>
<dd><p>Calculate the union of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>other</strong> – DataFrame to union with.</p></li>
<li><p><strong>distinct</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, duplicate rows will be removed.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after union.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.union_distinct">
<span class="sig-name descname"><span class="pre">union_distinct</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.union_distinct" title="Link to this definition"></a></dt>
<dd><p>Calculate the distinct union of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.
Any duplicate rows are discarded.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>other</strong> – DataFrame to union with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after union.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.unnest_columns">
<span class="sig-name descname"><span class="pre">unnest_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">preserve_nulls</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.unnest_columns" title="Link to this definition"></a></dt>
<dd><p>Expand columns of arrays into a single row per array element.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>columns</strong> – Column names to perform unnest operation on.</p></li>
<li><p><strong>preserve_nulls</strong> – If False, rows with null entries will not be
returned.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>A DataFrame with the columns expanded.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_column">
<span class="sig-name descname"><span class="pre">with_column</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">expr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_column" title="Link to this definition"></a></dt>
<dd><p>Add an additional column to the DataFrame.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">expr</span></code> must be an <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> constructed with
<a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a> or <a class="reference internal" href="../index.html#datafusion.lit" title="datafusion.lit"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.lit()</span></code></a>.</p>
<p>Example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span>
<span class="n">df</span><span class="o">.</span><span class="n">with_column</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">+</span> <span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>name</strong> – Name of the column to add.</p></li>
<li><p><strong>expr</strong> – Expression to compute the column.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the new column.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_column_renamed">
<span class="sig-name descname"><span class="pre">with_column_renamed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">old_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_column_renamed" title="Link to this definition"></a></dt>
<dd><p>Rename one column by applying a new projection.</p>
<p>This is a no-op if the column to be renamed does not exist.</p>
<p>The method supports case sensitive rename with wrapping column name
into one the following symbols (” or ‘ or `).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>old_name</strong> – Old column name.</p></li>
<li><p><strong>new_name</strong> – New column name.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the column renamed.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_columns">
<span class="sig-name descname"><span class="pre">with_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">named_exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_columns" title="Link to this definition"></a></dt>
<dd><p>Add columns to the DataFrame.</p>
<p>By passing expressions, iterables of expressions, or named expressions.
All expressions must be <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> objects created via
<a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a> or <a class="reference internal" href="../index.html#datafusion.lit" title="datafusion.lit"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.lit()</span></code></a>.
To pass named expressions use the form <code class="docutils literal notranslate"><span class="pre">name=Expr</span></code>.</p>
<p>Example usage: The following will add 4 columns labeled <code class="docutils literal notranslate"><span class="pre">a</span></code>, <code class="docutils literal notranslate"><span class="pre">b</span></code>, <code class="docutils literal notranslate"><span class="pre">c</span></code>,
and <code class="docutils literal notranslate"><span class="pre">d</span></code>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">with_columns</span><span class="p">(</span>
<span class="n">col</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">),</span>
<span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;y&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;c&quot;</span><span class="p">)],</span>
<span class="n">d</span><span class="o">=</span><span class="n">lit</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
<span class="p">)</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>exprs</strong> – Either a single expression or an iterable of expressions to add.</p></li>
<li><p><strong>named_exprs</strong> – Named expressions in the form of <code class="docutils literal notranslate"><span class="pre">name=expr</span></code></p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the new columns added.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_csv">
<span class="sig-name descname"><span class="pre">write_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">with_header</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_csv" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a CSV file.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the CSV file to write.</p></li>
<li><p><strong>with_header</strong> – If true, output the CSV header row.</p></li>
<li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_json">
<span class="sig-name descname"><span class="pre">write_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_json" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a JSON file.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the JSON file to write.</p></li>
<li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_parquet">
<span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_parquet" title="Link to this definition"></a></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><span class="pre">Compression</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">Compression.ZSTD</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><span class="pre">ParquetWriterOptions</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a Parquet file.</p>
<p>Available compression types are:</p>
<ul class="simple">
<li><p>“uncompressed”: No compression.</p></li>
<li><p>“snappy”: Snappy compression.</p></li>
<li><p>“gzip”: Gzip compression.</p></li>
<li><p>“brotli”: Brotli compression.</p></li>
<li><p>“lz4”: LZ4 compression.</p></li>
<li><p>“lz4_raw”: LZ4_RAW compression.</p></li>
<li><p>“zstd”: Zstandard compression.</p></li>
</ul>
<p>LZO compression is not yet implemented in arrow-rs and is therefore
excluded.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the Parquet file to write.</p></li>
<li><p><strong>compression</strong> – Compression type to use. Default is “ZSTD”.</p></li>
<li><p><strong>compression_level</strong> – Compression level to use. For ZSTD, the
recommended range is 1 to 22, with the default being 4. Higher levels
provide better compression but slower speed.</p></li>
<li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_parquet_with_options">
<span class="sig-name descname"><span class="pre">write_parquet_with_options</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><span class="pre">ParquetWriterOptions</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_parquet_with_options" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a Parquet file.</p>
<p>Allows advanced writer options to be set with <cite>ParquetWriterOptions</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the Parquet file to write.</p></li>
<li><p><strong>options</strong> – Sets the writer parquet options (see <cite>ParquetWriterOptions</cite>).</p></li>
<li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_table">
<span class="sig-name descname"><span class="pre">write_table</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_table" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a table.</p>
<p>The table must be registered with the session to perform this operation.
Not all table providers support writing operations. See the individual
implementations for details.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.df">
<span class="sig-name descname"><span class="pre">df</span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.df" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrameWriteOptions">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">DataFrameWriteOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">insert_operation</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.InsertOp" title="datafusion.dataframe.InsertOp"><span class="pre">InsertOp</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">single_file_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">partition_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sort_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.SortExpr" title="datafusion.expr.SortExpr"><span class="pre">datafusion.expr.SortExpr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.SortExpr" title="datafusion.expr.SortExpr"><span class="pre">datafusion.expr.SortExpr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.DataFrameWriteOptions" title="Link to this definition"></a></dt>
<dd><p>Writer options for DataFrame.</p>
<p>There is no guarantee the table provider supports all writer options.
See the individual implementation and documentation for details.</p>
<p>Instantiate writer options for DataFrame.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrameWriteOptions._raw_write_options">
<span class="sig-name descname"><span class="pre">_raw_write_options</span></span><a class="headerlink" href="#datafusion.dataframe.DataFrameWriteOptions._raw_write_options" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.InsertOp">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">InsertOp</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwds</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.InsertOp" title="Link to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-obj docutils literal notranslate"><span class="pre">enum.Enum</span></code></p>
<p>Insert operation mode.</p>
<p>These modes are used by the table writing feature to define how record
batches should be written to a table.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.InsertOp.APPEND">
<span class="sig-name descname"><span class="pre">APPEND</span></span><a class="headerlink" href="#datafusion.dataframe.InsertOp.APPEND" title="Link to this definition"></a></dt>
<dd><p>Appends new rows to the existing table without modifying any existing rows.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.InsertOp.OVERWRITE">
<span class="sig-name descname"><span class="pre">OVERWRITE</span></span><a class="headerlink" href="#datafusion.dataframe.InsertOp.OVERWRITE" title="Link to this definition"></a></dt>
<dd><p>Overwrites all existing rows in the table with the new rows.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.InsertOp.REPLACE">
<span class="sig-name descname"><span class="pre">REPLACE</span></span><a class="headerlink" href="#datafusion.dataframe.InsertOp.REPLACE" title="Link to this definition"></a></dt>
<dd><p>Replace existing rows that collide with the inserted rows.</p>
<p>Replacement is typically based on a unique key or primary key.</p>
</dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">ParquetColumnOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_fpp</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_ndv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions" title="Link to this definition"></a></dt>
<dd><p>Parquet options for individual columns.</p>
<p>Contains the available options that can be applied for an individual Parquet column,
replacing the global options in <code class="docutils literal notranslate"><span class="pre">ParquetWriterOptions</span></code>.</p>
<p>Initialize the ParquetColumnOptions.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>encoding</strong> – Sets encoding for the column path. Valid values are: <code class="docutils literal notranslate"><span class="pre">plain</span></code>,
<code class="docutils literal notranslate"><span class="pre">plain_dictionary</span></code>, <code class="docutils literal notranslate"><span class="pre">rle</span></code>, <code class="docutils literal notranslate"><span class="pre">bit_packed</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_binary_packed</span></code>,
<code class="docutils literal notranslate"><span class="pre">delta_length_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">rle_dictionary</span></code>,
and <code class="docutils literal notranslate"><span class="pre">byte_stream_split</span></code>. These values are not case-sensitive. If
<code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options</p></li>
<li><p><strong>dictionary_enabled</strong> – Sets if dictionary encoding is enabled for the column
path. If <cite>None</cite>, uses the default parquet options</p></li>
<li><p><strong>compression</strong> – Sets default parquet compression codec for the column path.
Valid values are <code class="docutils literal notranslate"><span class="pre">uncompressed</span></code>, <code class="docutils literal notranslate"><span class="pre">snappy</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip(level)</span></code>, <code class="docutils literal notranslate"><span class="pre">lzo</span></code>,
<code class="docutils literal notranslate"><span class="pre">brotli(level)</span></code>, <code class="docutils literal notranslate"><span class="pre">lz4</span></code>, <code class="docutils literal notranslate"><span class="pre">zstd(level)</span></code>, and <code class="docutils literal notranslate"><span class="pre">lz4_raw</span></code>. These
values are not case-sensitive. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet
options.</p></li>
<li><p><strong>statistics_enabled</strong> – Sets if statistics are enabled for the column Valid
values are: <code class="docutils literal notranslate"><span class="pre">none</span></code>, <code class="docutils literal notranslate"><span class="pre">chunk</span></code>, and <code class="docutils literal notranslate"><span class="pre">page</span></code> These values are not case
sensitive. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options.</p></li>
<li><p><strong>bloom_filter_enabled</strong> – Sets if bloom filter is enabled for the column path.
If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options.</p></li>
<li><p><strong>bloom_filter_fpp</strong> – Sets bloom filter false positive probability for the
column path. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options.</p></li>
<li><p><strong>bloom_filter_ndv</strong> – Sets bloom filter number of distinct values. If <code class="docutils literal notranslate"><span class="pre">None</span></code>,
uses the default parquet options.</p></li>
</ul>
</dd>
</dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled">
<span class="sig-name descname"><span class="pre">bloom_filter_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp">
<span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv">
<span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.compression">
<span class="sig-name descname"><span class="pre">compression</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.compression" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.dictionary_enabled">
<span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.dictionary_enabled" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.encoding">
<span class="sig-name descname"><span class="pre">encoding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.encoding" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.statistics_enabled">
<span class="sig-name descname"><span class="pre">statistics_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.statistics_enabled" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">ParquetWriterOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data_pagesize_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">writer_version</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'1.0'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">skip_arrow_metadata</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'zstd(3)'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_page_size_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'page'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_row_group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">created_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'datafusion-python'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_index_truncate_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_truncate_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_page_row_count_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_on_write</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_fpp</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_ndv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">allow_single_file_parallelism</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">maximum_parallel_row_group_writers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_specific_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#datafusion.dataframe.ParquetColumnOptions" title="datafusion.dataframe.ParquetColumnOptions"><span class="pre">ParquetColumnOptions</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions" title="Link to this definition"></a></dt>
<dd><p>Advanced parquet writer options.</p>
<p>Allows settings the writer options that apply to the entire file. Some options can
also be set on a column by column basis, with the field <code class="docutils literal notranslate"><span class="pre">column_specific_options</span></code>
(see <code class="docutils literal notranslate"><span class="pre">ParquetColumnOptions</span></code>).</p>
<p>Initialize the ParquetWriterOptions.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>data_pagesize_limit</strong> – Sets best effort maximum size of data page in bytes.</p></li>
<li><p><strong>write_batch_size</strong> – Sets write_batch_size in bytes.</p></li>
<li><p><strong>writer_version</strong> – Sets parquet writer version. Valid values are <code class="docutils literal notranslate"><span class="pre">1.0</span></code> and
<code class="docutils literal notranslate"><span class="pre">2.0</span></code>.</p></li>
<li><p><strong>skip_arrow_metadata</strong> – Skip encoding the embedded arrow metadata in the
KV_meta.</p></li>
<li><p><strong>compression</strong><p>Compression type to use. Default is <code class="docutils literal notranslate"><span class="pre">zstd(3)</span></code>.
Available compression types are</p>
<ul>
<li><p><code class="docutils literal notranslate"><span class="pre">uncompressed</span></code>: No compression.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">snappy</span></code>: Snappy compression.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">gzip(n)</span></code>: Gzip compression with level n.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">brotli(n)</span></code>: Brotli compression with level n.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">lz4</span></code>: LZ4 compression.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">lz4_raw</span></code>: LZ4_RAW compression.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">zstd(n)</span></code>: Zstandard compression with level n.</p></li>
</ul>
</p></li>
<li><p><strong>compression_level</strong> – Compression level to set.</p></li>
<li><p><strong>dictionary_enabled</strong> – Sets if dictionary encoding is enabled. If <code class="docutils literal notranslate"><span class="pre">None</span></code>,
uses the default parquet writer setting.</p></li>
<li><p><strong>dictionary_page_size_limit</strong> – Sets best effort maximum dictionary page size,
in bytes.</p></li>
<li><p><strong>statistics_enabled</strong> – Sets if statistics are enabled for any column Valid
values are <code class="docutils literal notranslate"><span class="pre">none</span></code>, <code class="docutils literal notranslate"><span class="pre">chunk</span></code>, and <code class="docutils literal notranslate"><span class="pre">page</span></code>. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the
default parquet writer setting.</p></li>
<li><p><strong>max_row_group_size</strong> – Target maximum number of rows in each row group
(defaults to 1M rows). Writing larger row groups requires more memory
to write, but can get better compression and be faster to read.</p></li>
<li><p><strong>created_by</strong> – Sets “created by” property.</p></li>
<li><p><strong>column_index_truncate_length</strong> – Sets column index truncate length.</p></li>
<li><p><strong>statistics_truncate_length</strong> – Sets statistics truncate length. If <code class="docutils literal notranslate"><span class="pre">None</span></code>,
uses the default parquet writer setting.</p></li>
<li><p><strong>data_page_row_count_limit</strong> – Sets best effort maximum number of rows in a data
page.</p></li>
<li><p><strong>encoding</strong> – Sets default encoding for any column. Valid values are <code class="docutils literal notranslate"><span class="pre">plain</span></code>,
<code class="docutils literal notranslate"><span class="pre">plain_dictionary</span></code>, <code class="docutils literal notranslate"><span class="pre">rle</span></code>, <code class="docutils literal notranslate"><span class="pre">bit_packed</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_binary_packed</span></code>,
<code class="docutils literal notranslate"><span class="pre">delta_length_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">rle_dictionary</span></code>,
and <code class="docutils literal notranslate"><span class="pre">byte_stream_split</span></code>. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet writer
setting.</p></li>
<li><p><strong>bloom_filter_on_write</strong> – Write bloom filters for all columns when creating
parquet files.</p></li>
<li><p><strong>bloom_filter_fpp</strong> – Sets bloom filter false positive probability. If <code class="docutils literal notranslate"><span class="pre">None</span></code>,
uses the default parquet writer setting</p></li>
<li><p><strong>bloom_filter_ndv</strong> – Sets bloom filter number of distinct values. If <code class="docutils literal notranslate"><span class="pre">None</span></code>,
uses the default parquet writer setting.</p></li>
<li><p><strong>allow_single_file_parallelism</strong> – Controls whether DataFusion will attempt to
speed up writing parquet files by serializing them in parallel. Each
column in each row group in each output file are serialized in parallel
leveraging a maximum possible core count of
<code class="docutils literal notranslate"><span class="pre">n_files</span> <span class="pre">*</span> <span class="pre">n_row_groups</span> <span class="pre">*</span> <span class="pre">n_columns</span></code>.</p></li>
<li><p><strong>maximum_parallel_row_group_writers</strong> – By default parallel parquet writer is
tuned for minimum memory usage in a streaming execution plan. You may
see a performance benefit when writing large parquet files by increasing
<code class="docutils literal notranslate"><span class="pre">maximum_parallel_row_group_writers</span></code> and
<code class="docutils literal notranslate"><span class="pre">maximum_buffered_record_batches_per_stream</span></code> if your system has idle
cores and can tolerate additional memory usage. Boosting these values is
likely worthwhile when writing out already in-memory data, such as from
a cached data frame.</p></li>
<li><p><strong>maximum_buffered_record_batches_per_stream</strong> – See
<code class="docutils literal notranslate"><span class="pre">maximum_parallel_row_group_writers</span></code>.</p></li>
<li><p><strong>column_specific_options</strong> – Overrides options for specific columns. If a column
is not a part of this dictionary, it will use the parameters provided
here.</p></li>
</ul>
</dd>
</dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism">
<span class="sig-name descname"><span class="pre">allow_single_file_parallelism</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp">
<span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv">
<span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write">
<span class="sig-name descname"><span class="pre">bloom_filter_on_write</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length">
<span class="sig-name descname"><span class="pre">column_index_truncate_length</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">64</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.column_specific_options">
<span class="sig-name descname"><span class="pre">column_specific_options</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.column_specific_options" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.created_by">
<span class="sig-name descname"><span class="pre">created_by</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'datafusion-python'</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.created_by" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit">
<span class="sig-name descname"><span class="pre">data_page_row_count_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">20000</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit">
<span class="sig-name descname"><span class="pre">data_pagesize_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.dictionary_enabled">
<span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_enabled" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit">
<span class="sig-name descname"><span class="pre">dictionary_page_size_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.encoding">
<span class="sig-name descname"><span class="pre">encoding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.encoding" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.max_row_group_size">
<span class="sig-name descname"><span class="pre">max_row_group_size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.max_row_group_size" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream">
<span class="sig-name descname"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers">
<span class="sig-name descname"><span class="pre">maximum_parallel_row_group_writers</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata">
<span class="sig-name descname"><span class="pre">skip_arrow_metadata</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.statistics_enabled">
<span class="sig-name descname"><span class="pre">statistics_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'page'</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.statistics_enabled" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length">
<span class="sig-name descname"><span class="pre">statistics_truncate_length</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.write_batch_size">
<span class="sig-name descname"><span class="pre">write_batch_size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1024</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.write_batch_size" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.writer_version">
<span class="sig-name descname"><span class="pre">writer_version</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'1.0'</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.writer_version" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</section>
</section>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="../context/index.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">datafusion.context</p>
</div>
</a>
<a class='right-next' id="next-link" href="../dataframe_formatter/index.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">datafusion.dataframe_formatter</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<!-- Based on pydata_sphinx_theme/footer.html -->
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright 2019-2024, Apache Software Foundation.<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 8.1.3.<br>
</p>
</div>
<div class="footer-item">
<p>Apache Arrow DataFusion, Arrow DataFusion, Apache, the Apache feather logo, and the Apache Arrow DataFusion project logo</p>
<p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</div>
</footer>
</body>
</html>