blob: ba47b9f98346ee98ddfaa59482c9a87f2c201ac2 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>datafusion.dataframe &#8212; Apache Arrow DataFusion documentation</title>
<link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/pydata-sphinx-theme.css?v=1140d252" />
<link rel="stylesheet" type="text/css" href="../../../_static/graphviz.css?v=4ae1632d" />
<link rel="stylesheet" type="text/css" href="../../../_static/theme_overrides.css?v=dca7052a" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script src="../../../_static/documentation_options.js?v=8a448e45"></script>
<script src="../../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="datafusion.dataframe_formatter" href="../dataframe_formatter/index.html" />
<link rel="prev" title="datafusion.context" href="../context/index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/images/2x_bgwhite_original.png" class="logo" alt="logo">
</a>
<form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
LINKS
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python">
Github and Issue Tracker
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/">
Rust's API Docs
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">
Code of conduct
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python/tree/main/examples">
Examples
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
USER GUIDE
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/basics.html">
Concepts
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/data-sources.html">
Data Sources
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../../user-guide/dataframe/index.html">
DataFrames
</a>
<input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/>
<label for="toctree-checkbox-1">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/dataframe/rendering.html">
HTML Rendering in Jupyter
</a>
</li>
</ul>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../../user-guide/common-operations/index.html">
Common Operations
</a>
<input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/>
<label for="toctree-checkbox-2">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/views.html">
Registering Views
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/basic-info.html">
Basic Operations
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/select-and-filter.html">
Column Selections
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/expressions.html">
Expressions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/joins.html">
Joins
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/functions.html">
Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/aggregations.html">
Aggregation
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/windows.html">
Window Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/common-operations/udf-and-udfa.html">
User-Defined Functions
</a>
</li>
</ul>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../../user-guide/io/index.html">
IO
</a>
<input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/>
<label for="toctree-checkbox-3">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/arrow.html">
Arrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/avro.html">
Avro
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/csv.html">
CSV
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/json.html">
JSON
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/parquet.html">
Parquet
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../../../user-guide/io/table_provider.html">
Custom Table Provider
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/configuration.html">
Configuration
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../user-guide/sql.html">
SQL
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
CONTRIBUTOR GUIDE
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../../contributor-guide/introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../../contributor-guide/ffi.html">
Python Extensions
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
API
</span>
</p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1 current active has-children">
<a class="reference internal" href="../../index.html">
API Reference
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/>
<label for="toctree-checkbox-4">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l2 current active has-children">
<a class="reference internal" href="../index.html">
datafusion
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/>
<label for="toctree-checkbox-5">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l3">
<a class="reference internal" href="../catalog/index.html">
datafusion.catalog
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../context/index.html">
datafusion.context
</a>
</li>
<li class="toctree-l3 current active">
<a class="current reference internal" href="#">
datafusion.dataframe
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../dataframe_formatter/index.html">
datafusion.dataframe_formatter
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../expr/index.html">
datafusion.expr
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../functions/index.html">
datafusion.functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../html_formatter/index.html">
datafusion.html_formatter
</a>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../input/index.html">
datafusion.input
</a>
<input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/>
<label for="toctree-checkbox-6">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../input/base/index.html">
datafusion.input.base
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../input/location/index.html">
datafusion.input.location
</a>
</li>
</ul>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../io/index.html">
datafusion.io
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../object_store/index.html">
datafusion.object_store
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../plan/index.html">
datafusion.plan
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../record_batch/index.html">
datafusion.record_batch
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../substrait/index.html">
datafusion.substrait
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../unparser/index.html">
datafusion.unparser
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../user_defined/index.html">
datafusion.user_defined
</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#classes">
Classes
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#module-contents">
Module Contents
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression">
<code class="docutils literal notranslate">
<span class="pre">
Compression
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.from_str">
<code class="docutils literal notranslate">
<span class="pre">
Compression.from_str()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.get_default_level">
<code class="docutils literal notranslate">
<span class="pre">
Compression.get_default_level()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.BROTLI">
<code class="docutils literal notranslate">
<span class="pre">
Compression.BROTLI
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.GZIP">
<code class="docutils literal notranslate">
<span class="pre">
Compression.GZIP
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.LZ4">
<code class="docutils literal notranslate">
<span class="pre">
Compression.LZ4
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.LZ4_RAW">
<code class="docutils literal notranslate">
<span class="pre">
Compression.LZ4_RAW
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.SNAPPY">
<code class="docutils literal notranslate">
<span class="pre">
Compression.SNAPPY
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.UNCOMPRESSED">
<code class="docutils literal notranslate">
<span class="pre">
Compression.UNCOMPRESSED
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.Compression.ZSTD">
<code class="docutils literal notranslate">
<span class="pre">
Compression.ZSTD
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__arrow_c_stream__">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.__arrow_c_stream__()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__getitem__">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.__getitem__()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__repr__">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.__repr__()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame._repr_html_">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame._repr_html_()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.aggregate">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.aggregate()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.cache">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.cache()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.cast">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.cast()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.collect">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.collect()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.collect_partitioned">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.collect_partitioned()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.count">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.count()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.default_str_repr">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.default_str_repr()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.describe">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.describe()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.distinct">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.distinct()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.drop">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.drop()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.except_all">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.except_all()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execute_stream">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.execute_stream()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execute_stream_partitioned">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.execute_stream_partitioned()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execution_plan">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.execution_plan()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.explain">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.explain()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.fill_null">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.fill_null()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.filter">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.filter()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.head">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.head()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.intersect">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.intersect()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.into_view">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.into_view()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.join">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.join()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.join_on">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.join_on()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.limit">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.limit()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.logical_plan">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.logical_plan()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.optimized_logical_plan">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.optimized_logical_plan()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.repartition">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.repartition()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.repartition_by_hash">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.repartition_by_hash()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.schema">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.schema()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.select">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.select()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.select_columns">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.select_columns()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.show">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.show()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.sort">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.sort()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.tail">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.tail()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_arrow_table">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_arrow_table()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pandas">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_pandas()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_polars">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_polars()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pydict">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_pydict()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pylist">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.to_pylist()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.transform">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.transform()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.union">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.union()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.union_distinct">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.union_distinct()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.unnest_columns">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.unnest_columns()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_column">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.with_column()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_column_renamed">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.with_column_renamed()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_columns">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.with_columns()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_csv">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_csv()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_json">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_json()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_parquet">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_parquet()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_parquet_with_options">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.write_parquet_with_options()
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.df">
<code class="docutils literal notranslate">
<span class="pre">
DataFrame.df
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.encoding">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.encoding
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.dictionary_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.dictionary_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.compression">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.compression
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.statistics_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.statistics_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_fpp
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_ndv
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id0">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id1">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_fpp
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id2">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.bloom_filter_ndv
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id3">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.compression
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id4">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.dictionary_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id5">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.encoding
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id6">
<code class="docutils literal notranslate">
<span class="pre">
ParquetColumnOptions.statistics_enabled
</span>
</code>
</a>
</li>
</ul>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions
</span>
</code>
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.data_pagesize_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.write_batch_size">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.write_batch_size
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.writer_version">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.writer_version
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.skip_arrow_metadata
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.compression">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.compression
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.dictionary_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.dictionary_page_size_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.statistics_enabled">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.statistics_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.max_row_group_size">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.max_row_group_size
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.created_by">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.created_by
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.column_index_truncate_length
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.statistics_truncate_length
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.data_page_row_count_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.encoding">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.encoding
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_on_write
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_fpp
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_ndv
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.allow_single_file_parallelism
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.maximum_parallel_row_group_writers
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.maximum_buffered_record_batches_per_stream
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.column_specific_options">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.column_specific_options
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id7">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.allow_single_file_parallelism
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id8">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_fpp
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id9">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_ndv
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id10">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.bloom_filter_on_write
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id11">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.column_index_truncate_length
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id12">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.column_specific_options
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id13">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.created_by
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id14">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.data_page_row_count_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id15">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.data_pagesize_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id16">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.dictionary_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id17">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.dictionary_page_size_limit
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id18">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.encoding
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id19">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.max_row_group_size
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id20">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.maximum_buffered_record_batches_per_stream
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id21">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.maximum_parallel_row_group_writers
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id22">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.skip_arrow_metadata
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id23">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.statistics_enabled
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id24">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.statistics_truncate_length
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id25">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.write_batch_size
</span>
</code>
</a>
</li>
<li class="toc-h4 nav-item toc-entry">
<a class="reference internal nav-link" href="#id26">
<code class="docutils literal notranslate">
<span class="pre">
ParquetWriterOptions.writer_version
</span>
</code>
</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<section id="module-datafusion.dataframe">
<span id="datafusion-dataframe"></span><h1>datafusion.dataframe<a class="headerlink" href="#module-datafusion.dataframe" title="Link to this heading"></a></h1>
<p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> is one of the core concepts in DataFusion.</p>
<p>See <a class="reference internal" href="../../../user-guide/basics.html#user-guide-concepts"><span class="std std-ref">Concepts</span></a> in the online documentation for more information.</p>
<section id="classes">
<h2>Classes<a class="headerlink" href="#classes" title="Link to this heading"></a></h2>
<table class="autosummary longtable table autosummary">
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Compression</span></code></a></p></td>
<td><p>Enum representing the available compression types for Parquet files.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame</span></code></a></p></td>
<td><p>Two dimensional table representation of data.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.ParquetColumnOptions" title="datafusion.dataframe.ParquetColumnOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ParquetColumnOptions</span></code></a></p></td>
<td><p>Parquet options for individual columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ParquetWriterOptions</span></code></a></p></td>
<td><p>Advanced parquet writer options.</p></td>
</tr>
</tbody>
</table>
</section>
<section id="module-contents">
<h2>Module Contents<a class="headerlink" href="#module-contents" title="Link to this heading"></a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">Compression</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwds</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.Compression" title="Link to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-obj docutils literal notranslate"><span class="pre">enum.Enum</span></code></p>
<p>Enum representing the available compression types for Parquet files.</p>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.from_str">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_str</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><span class="pre">Compression</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.Compression.from_str" title="Link to this definition"></a></dt>
<dd><p>Convert a string to a Compression enum value.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>value</strong> – The string representation of the compression type.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>The Compression enum lowercase value.</p>
</dd>
<dt class="field-odd">Raises<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>ValueError</strong> – If the string does not match any Compression enum value.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.get_default_level">
<span class="sig-name descname"><span class="pre">get_default_level</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.Compression.get_default_level" title="Link to this definition"></a></dt>
<dd><p>Get the default compression level for the compression type.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>The default compression level for the compression type.</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.BROTLI">
<span class="sig-name descname"><span class="pre">BROTLI</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'brotli'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.BROTLI" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.GZIP">
<span class="sig-name descname"><span class="pre">GZIP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'gzip'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.GZIP" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.LZ4">
<span class="sig-name descname"><span class="pre">LZ4</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'lz4'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.LZ4" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.LZ4_RAW">
<span class="sig-name descname"><span class="pre">LZ4_RAW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'lz4_raw'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.LZ4_RAW" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.SNAPPY">
<span class="sig-name descname"><span class="pre">SNAPPY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'snappy'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.SNAPPY" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.UNCOMPRESSED">
<span class="sig-name descname"><span class="pre">UNCOMPRESSED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'uncompressed'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.UNCOMPRESSED" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.Compression.ZSTD">
<span class="sig-name descname"><span class="pre">ZSTD</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'zstd'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.ZSTD" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">DataFrame</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">datafusion._internal.DataFrame</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.DataFrame" title="Link to this definition"></a></dt>
<dd><p>Two dimensional table representation of data.</p>
<p>See <a class="reference internal" href="../../../user-guide/basics.html#user-guide-concepts"><span class="std std-ref">Concepts</span></a> in the online documentation for more information.</p>
<p>This constructor is not to be used by the end user.</p>
<p>See <a class="reference internal" href="../context/index.html#datafusion.context.SessionContext" title="datafusion.context.SessionContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">SessionContext</span></code></a> for methods to
create a <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__arrow_c_stream__">
<span class="sig-name descname"><span class="pre">__arrow_c_stream__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">requested_schema</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">object</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">object</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__arrow_c_stream__" title="Link to this definition"></a></dt>
<dd><p>Export an Arrow PyCapsule Stream.</p>
<p>This will execute and collect the DataFrame. We will attempt to respect the
requested schema, but only trivial transformations will be applied such as only
returning the fields listed in the requested schema if their data types match
those in the DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>requested_schema</strong> – Attempt to provide the DataFrame using this schema.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Arrow PyCapsule object.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__getitem__">
<span class="sig-name descname"><span class="pre">__getitem__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">key</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__getitem__" title="Link to this definition"></a></dt>
<dd><p>Return a new :py:class`DataFrame` with the specified column or columns.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>key</strong> – Column name or list of column names to select.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the specified column or columns.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__repr__">
<span class="sig-name descname"><span class="pre">__repr__</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__repr__" title="Link to this definition"></a></dt>
<dd><p>Return a string representation of the DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>String representation of the DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame._repr_html_">
<span class="sig-name descname"><span class="pre">_repr_html_</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame._repr_html_" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.aggregate">
<span class="sig-name descname"><span class="pre">aggregate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">aggs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.aggregate" title="Link to this definition"></a></dt>
<dd><p>Aggregates the rows of the current DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>group_by</strong> – List of expressions to group by.</p></li>
<li><p><strong>aggs</strong> – List of expressions to aggregate.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after aggregation.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.cache">
<span class="sig-name descname"><span class="pre">cache</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.cache" title="Link to this definition"></a></dt>
<dd><p>Cache the DataFrame as a memory table.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Cached DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.cast">
<span class="sig-name descname"><span class="pre">cast</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">pyarrow.DataType</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.cast" title="Link to this definition"></a></dt>
<dd><p>Cast one or more columns to a different data type.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>mapping</strong> – Mapped with column as key and column dtype as value.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after casting columns</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.collect">
<span class="sig-name descname"><span class="pre">collect</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.collect" title="Link to this definition"></a></dt>
<dd><p>Execute this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and collect results into memory.</p>
<p>Prior to calling <code class="docutils literal notranslate"><span class="pre">collect</span></code>, modifying a DataFrame simply updates a plan
(no actual computation is performed). Calling <code class="docutils literal notranslate"><span class="pre">collect</span></code> triggers the
computation.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>List of <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code> collected from the DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.collect_partitioned">
<span class="sig-name descname"><span class="pre">collect_partitioned</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.collect_partitioned" title="Link to this definition"></a></dt>
<dd><p>Execute this DataFrame and collect all partitioned results.</p>
<p>This operation returns <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code> maintaining the input
partitioning.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p><dl class="simple">
<dt>List of list of <code class="xref py py-class docutils literal notranslate"><span class="pre">RecordBatch</span></code> collected from the</dt><dd><p>DataFrame.</p>
</dd>
</dl>
</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.count">
<span class="sig-name descname"><span class="pre">count</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.count" title="Link to this definition"></a></dt>
<dd><p>Return the total number of rows in this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>Note that this method will actually run a plan to calculate the
count, which may be slow for large or complicated DataFrames.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Number of rows in the DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.default_str_repr">
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">default_str_repr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">batches</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">schema</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">pyarrow.Schema</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_more</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">table_uuid</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.default_str_repr" title="Link to this definition"></a></dt>
<dd><p>Return the default string representation of a DataFrame.</p>
<p>This method is used by the default formatter and implemented in Rust for
performance reasons.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.describe">
<span class="sig-name descname"><span class="pre">describe</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.describe" title="Link to this definition"></a></dt>
<dd><p>Return the statistics for this DataFrame.</p>
<p>Only summarized numeric datatypes at the moments and returns nulls
for non-numeric datatypes.</p>
<p>The output format is modeled after pandas.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>A summary DataFrame containing statistics.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.distinct">
<span class="sig-name descname"><span class="pre">distinct</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.distinct" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with all duplicated rows removed.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>DataFrame after removing duplicates.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.drop">
<span class="sig-name descname"><span class="pre">drop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.drop" title="Link to this definition"></a></dt>
<dd><p>Drop arbitrary amount of columns.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>columns</strong> – Column names to drop from the dataframe.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with those columns removed in the projection.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.except_all">
<span class="sig-name descname"><span class="pre">except_all</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.except_all" title="Link to this definition"></a></dt>
<dd><p>Calculate the exception of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>other</strong> – DataFrame to calculate exception with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after exception.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execute_stream">
<span class="sig-name descname"><span class="pre">execute_stream</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../record_batch/index.html#datafusion.record_batch.RecordBatchStream" title="datafusion.record_batch.RecordBatchStream"><span class="pre">datafusion.record_batch.RecordBatchStream</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execute_stream" title="Link to this definition"></a></dt>
<dd><p>Executes this DataFrame and returns a stream over a single partition.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Record Batch Stream over a single partition.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execute_stream_partitioned">
<span class="sig-name descname"><span class="pre">execute_stream_partitioned</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../record_batch/index.html#datafusion.record_batch.RecordBatchStream" title="datafusion.record_batch.RecordBatchStream"><span class="pre">datafusion.record_batch.RecordBatchStream</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execute_stream_partitioned" title="Link to this definition"></a></dt>
<dd><p>Executes this DataFrame and returns a stream for each partition.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>One record batch stream per partition.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execution_plan">
<span class="sig-name descname"><span class="pre">execution_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.ExecutionPlan" title="datafusion.plan.ExecutionPlan"><span class="pre">datafusion.plan.ExecutionPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execution_plan" title="Link to this definition"></a></dt>
<dd><p>Return the execution/physical plan.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Execution plan.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.explain">
<span class="sig-name descname"><span class="pre">explain</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">analyze</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.explain" title="Link to this definition"></a></dt>
<dd><p>Print an explanation of the DataFrame’s plan so far.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">analyze</span></code> is specified, runs the plan and reports metrics.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>verbose</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, more details will be included.</p></li>
<li><p><strong>analyze</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, the plan will run and metrics reported.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.fill_null">
<span class="sig-name descname"><span class="pre">fill_null</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">subset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.fill_null" title="Link to this definition"></a></dt>
<dd><p>Fill null values in specified columns with a value.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>value</strong> – Value to replace nulls with. Will be cast to match column type.</p></li>
<li><p><strong>subset</strong> – Optional list of column names to fill. If None, fills all columns.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with null values replaced where type casting is possible</p>
</dd>
</dl>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">fill_null</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># Fill all nulls with 0 where possible</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># Fill nulls in specific string columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">fill_null</span><span class="p">(</span><span class="s2">&quot;missing&quot;</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;category&quot;</span><span class="p">])</span>
</pre></div>
</div>
<p class="rubric">Notes</p>
<ul class="simple">
<li><p>Only fills nulls in columns where the value can be cast to the column type</p></li>
<li><p>For columns where casting fails, the original column is kept unchanged</p></li>
<li><p>For columns not in subset, the original column is kept unchanged</p></li>
</ul>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.filter">
<span class="sig-name descname"><span class="pre">filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">predicates</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.filter" title="Link to this definition"></a></dt>
<dd><p>Return a DataFrame for which <code class="docutils literal notranslate"><span class="pre">predicate</span></code> evaluates to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p>
<p>Rows for which <code class="docutils literal notranslate"><span class="pre">predicate</span></code> evaluates to <code class="docutils literal notranslate"><span class="pre">False</span></code> or <code class="docutils literal notranslate"><span class="pre">None</span></code> are filtered
out. If more than one predicate is provided, these predicates will be
combined as a logical AND. If more complex logic is required, see the
logical operations in <a class="reference internal" href="../functions/index.html#module-datafusion.functions" title="datafusion.functions"><code class="xref py py-mod docutils literal notranslate"><span class="pre">functions</span></code></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>predicates</strong> – Predicate expression(s) to filter the DataFrame.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after filtering.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.head">
<span class="sig-name descname"><span class="pre">head</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.head" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>n</strong> – Number of rows to take from the head of the DataFrame.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after limiting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.intersect">
<span class="sig-name descname"><span class="pre">intersect</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.intersect" title="Link to this definition"></a></dt>
<dd><p>Calculate the intersection of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>other</strong> – DataFrame to intersect with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after intersection.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.into_view">
<span class="sig-name descname"><span class="pre">into_view</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Table</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.into_view" title="Link to this definition"></a></dt>
<dd><p>Convert DataFrame as a ViewTable which can be used in register_table.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.join">
<span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.join" title="Link to this definition"></a></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span></dt>
<dd><p>Join this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with another <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p><cite>on</cite> has to be provided or both <cite>left_on</cite> and <cite>right_on</cite> in conjunction.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>right</strong> – Other DataFrame to join with.</p></li>
<li><p><strong>on</strong> – Column names to join on in both dataframes.</p></li>
<li><p><strong>how</strong> – Type of join to perform. Supported types are “inner”, “left”,
“right”, “full”, “semi”, “anti”.</p></li>
<li><p><strong>left_on</strong> – Join column of the left dataframe.</p></li>
<li><p><strong>right_on</strong> – Join column of the right dataframe.</p></li>
<li><p><strong>join_keys</strong> – Tuple of two lists of column names to join on. [Deprecated]</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after join.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.join_on">
<span class="sig-name descname"><span class="pre">join_on</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">on_exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.join_on" title="Link to this definition"></a></dt>
<dd><p>Join two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified expressions.</p>
<p>On expressions are used to support in-equality predicates. Equality
predicates are correctly optimized</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>right</strong> – Other DataFrame to join with.</p></li>
<li><p><strong>on_exprs</strong> – single or multiple (in)-equality predicates.</p></li>
<li><p><strong>how</strong> – Type of join to perform. Supported types are “inner”, “left”,
“right”, “full”, “semi”, “anti”.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after join.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.limit">
<span class="sig-name descname"><span class="pre">limit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.limit" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>count</strong> – Number of rows to limit the DataFrame to.</p></li>
<li><p><strong>offset</strong> – Number of rows to skip.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after limiting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.logical_plan">
<span class="sig-name descname"><span class="pre">logical_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.LogicalPlan" title="datafusion.plan.LogicalPlan"><span class="pre">datafusion.plan.LogicalPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.logical_plan" title="Link to this definition"></a></dt>
<dd><p>Return the unoptimized <code class="docutils literal notranslate"><span class="pre">LogicalPlan</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Unoptimized logical plan.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.optimized_logical_plan">
<span class="sig-name descname"><span class="pre">optimized_logical_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.LogicalPlan" title="datafusion.plan.LogicalPlan"><span class="pre">datafusion.plan.LogicalPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.optimized_logical_plan" title="Link to this definition"></a></dt>
<dd><p>Return the optimized <code class="docutils literal notranslate"><span class="pre">LogicalPlan</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Optimized logical plan.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.repartition">
<span class="sig-name descname"><span class="pre">repartition</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.repartition" title="Link to this definition"></a></dt>
<dd><p>Repartition a DataFrame into <code class="docutils literal notranslate"><span class="pre">num</span></code> partitions.</p>
<p>The batches allocation uses a round-robin algorithm.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>num</strong> – Number of partitions to repartition the DataFrame into.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Repartitioned DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.repartition_by_hash">
<span class="sig-name descname"><span class="pre">repartition_by_hash</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.repartition_by_hash" title="Link to this definition"></a></dt>
<dd><p>Repartition a DataFrame using a hash partitioning scheme.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>exprs</strong> – Expressions to evaluate and perform hashing on.</p></li>
<li><p><strong>num</strong> – Number of partitions to repartition the DataFrame into.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Repartitioned DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.schema">
<span class="sig-name descname"><span class="pre">schema</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Schema</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.schema" title="Link to this definition"></a></dt>
<dd><p>Return the <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.Schema</span></code> of this DataFrame.</p>
<p>The output schema contains information on the name, data type, and
nullability for each column.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Describing schema of the DataFrame</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.select">
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.select" title="Link to this definition"></a></dt>
<dd><p>Project arbitrary expressions into a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>exprs</strong> – Either column names or <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> to select.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after projection. It has one column for each expression.</p>
</dd>
</dl>
<p>Example usage:</p>
<p>The following example will return 3 columns from the original dataframe.
The first two columns will be the original column <code class="docutils literal notranslate"><span class="pre">a</span></code> and <code class="docutils literal notranslate"><span class="pre">b</span></code> since the
string “a” is assumed to refer to column selection. Also a duplicate of
column <code class="docutils literal notranslate"><span class="pre">a</span></code> will be returned with the column name <code class="docutils literal notranslate"><span class="pre">alternate_a</span></code>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;alternate_a&quot;</span><span class="p">))</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.select_columns">
<span class="sig-name descname"><span class="pre">select_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.select_columns" title="Link to this definition"></a></dt>
<dd><p>Filter the DataFrame by columns.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>DataFrame only containing the specified columns.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.show">
<span class="sig-name descname"><span class="pre">show</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.show" title="Link to this definition"></a></dt>
<dd><p>Execute the DataFrame and print the result to the console.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>num</strong> – Number of lines to show.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.sort">
<span class="sig-name descname"><span class="pre">sort</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.SortExpr" title="datafusion.expr.SortExpr"><span class="pre">datafusion.expr.SortExpr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.sort" title="Link to this definition"></a></dt>
<dd><p>Sort the DataFrame by the specified sorting expressions.</p>
<p>Note that any expression can be turned into a sort expression by
calling its` <code class="docutils literal notranslate"><span class="pre">sort</span></code> method.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>exprs</strong> – Sort expressions, applied in order.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after sorting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.tail">
<span class="sig-name descname"><span class="pre">tail</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.tail" title="Link to this definition"></a></dt>
<dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p>
<p>Be aware this could be potentially expensive since the row size needs to be
determined of the dataframe. This is done by collecting it.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>n</strong> – Number of rows to take from the tail of the DataFrame.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after limiting.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_arrow_table">
<span class="sig-name descname"><span class="pre">to_arrow_table</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Table</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_arrow_table" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into an Arrow Table.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Arrow Table.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pandas">
<span class="sig-name descname"><span class="pre">to_pandas</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">pandas.DataFrame</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pandas" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a Pandas DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Pandas DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_polars">
<span class="sig-name descname"><span class="pre">to_polars</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">polars.DataFrame</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_polars" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a Polars DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Polars DataFrame.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pydict">
<span class="sig-name descname"><span class="pre">to_pydict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pydict" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a dictionary of lists.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>Dictionary of lists.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pylist">
<span class="sig-name descname"><span class="pre">to_pylist</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pylist" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a list of dictionaries.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>List of dictionaries.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.transform">
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="pre">Ellipsis</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.transform" title="Link to this definition"></a></dt>
<dd><p>Apply a function to the current DataFrame which returns another DataFrame.</p>
<p>This is useful for chaining together multiple functions. For example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">add_3</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">with_column</span><span class="p">(</span><span class="s2">&quot;modified&quot;</span><span class="p">,</span> <span class="n">lit</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
<span class="k">def</span><span class="w"> </span><span class="nf">within_limit</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">limit</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">modify_df</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">within_limit</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>func</strong> – A callable function that takes a DataFrame as it’s first argument</p></li>
<li><p><strong>args</strong> – Zero or more arguments to pass to <cite>func</cite></p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>After applying func to the original dataframe.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame">DataFrame</a></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.union">
<span class="sig-name descname"><span class="pre">union</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">distinct</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.union" title="Link to this definition"></a></dt>
<dd><p>Calculate the union of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>other</strong> – DataFrame to union with.</p></li>
<li><p><strong>distinct</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, duplicate rows will be removed.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after union.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.union_distinct">
<span class="sig-name descname"><span class="pre">union_distinct</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.union_distinct" title="Link to this definition"></a></dt>
<dd><p>Calculate the distinct union of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p>
<p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.
Any duplicate rows are discarded.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>other</strong> – DataFrame to union with.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame after union.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.unnest_columns">
<span class="sig-name descname"><span class="pre">unnest_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">preserve_nulls</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.unnest_columns" title="Link to this definition"></a></dt>
<dd><p>Expand columns of arrays into a single row per array element.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>columns</strong> – Column names to perform unnest operation on.</p></li>
<li><p><strong>preserve_nulls</strong> – If False, rows with null entries will not be
returned.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>A DataFrame with the columns expanded.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_column">
<span class="sig-name descname"><span class="pre">with_column</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">expr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_column" title="Link to this definition"></a></dt>
<dd><p>Add an additional column to the DataFrame.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>name</strong> – Name of the column to add.</p></li>
<li><p><strong>expr</strong> – Expression to compute the column.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the new column.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_column_renamed">
<span class="sig-name descname"><span class="pre">with_column_renamed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">old_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_column_renamed" title="Link to this definition"></a></dt>
<dd><p>Rename one column by applying a new projection.</p>
<p>This is a no-op if the column to be renamed does not exist.</p>
<p>The method supports case sensitive rename with wrapping column name
into one the following symbols (” or ‘ or `).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>old_name</strong> – Old column name.</p></li>
<li><p><strong>new_name</strong> – New column name.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the column renamed.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_columns">
<span class="sig-name descname"><span class="pre">with_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">named_exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_columns" title="Link to this definition"></a></dt>
<dd><p>Add columns to the DataFrame.</p>
<p>By passing expressions, iteratables of expressions, or named expressions. To
pass named expressions use the form name=Expr.</p>
<p>Example usage: The following will add 4 columns labeled a, b, c, and d:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">with_columns</span><span class="p">(</span>
<span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">),</span>
<span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;b&#39;</span><span class="p">),</span> <span class="n">lit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">)],</span>
<span class="n">d</span><span class="o">=</span><span class="n">lit</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
<span class="p">)</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>exprs</strong> – Either a single expression or an iterable of expressions to add.</p></li>
<li><p><strong>named_exprs</strong> – Named expressions in the form of <code class="docutils literal notranslate"><span class="pre">name=expr</span></code></p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with the new columns added.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_csv">
<span class="sig-name descname"><span class="pre">write_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">with_header</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_csv" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a CSV file.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the CSV file to write.</p></li>
<li><p><strong>with_header</strong> – If true, output the CSV header row.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_json">
<span class="sig-name descname"><span class="pre">write_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_json" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a JSON file.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>path</strong> – Path of the JSON file to write.</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_parquet">
<span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_parquet" title="Link to this definition"></a></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><span class="pre">Compression</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">Compression.ZSTD</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span></dt>
<dt class="sig sig-object py">
<span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><span class="pre">ParquetWriterOptions</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a Parquet file.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the Parquet file to write.</p></li>
<li><p><strong>compression</strong> – Compression type to use. Default is “ZSTD”.
Available compression types are:
- “uncompressed”: No compression.
- “snappy”: Snappy compression.
- “gzip”: Gzip compression.
- “brotli”: Brotli compression.
- “lz4”: LZ4 compression.
- “lz4_raw”: LZ4_RAW compression.
- “zstd”: Zstandard compression.</p></li>
<li><p><strong>Note</strong> – LZO is not yet implemented in arrow-rs and is therefore excluded.</p></li>
<li><p><strong>compression_level</strong> – Compression level to use. For ZSTD, the
recommended range is 1 to 22, with the default being 4. Higher levels
provide better compression but slower speed.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_parquet_with_options">
<span class="sig-name descname"><span class="pre">write_parquet_with_options</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><span class="pre">ParquetWriterOptions</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_parquet_with_options" title="Link to this definition"></a></dt>
<dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a Parquet file.</p>
<p>Allows advanced writer options to be set with <cite>ParquetWriterOptions</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>path</strong> – Path of the Parquet file to write.</p></li>
<li><p><strong>options</strong> – Sets the writer parquet options (see <cite>ParquetWriterOptions</cite>).</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.df">
<span class="sig-name descname"><span class="pre">df</span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.df" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">ParquetColumnOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_fpp</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_ndv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions" title="Link to this definition"></a></dt>
<dd><p>Parquet options for individual columns.</p>
<p>Contains the available options that can be applied for an individual Parquet column,
replacing the global options in <cite>ParquetWriterOptions</cite>.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.encoding">
<span class="sig-name descname"><span class="pre">encoding</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.encoding" title="Link to this definition"></a></dt>
<dd><p>Sets encoding for the column path. Valid values are: <cite>plain</cite>,
<cite>plain_dictionary</cite>, <cite>rle</cite>, <cite>bit_packed</cite>, <cite>delta_binary_packed</cite>,
<cite>delta_length_byte_array</cite>, <cite>delta_byte_array</cite>, <cite>rle_dictionary</cite>, and
<cite>byte_stream_split</cite>. These values are not case-sensitive. If <cite>None</cite>, uses
the default parquet options</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.dictionary_enabled">
<span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.dictionary_enabled" title="Link to this definition"></a></dt>
<dd><p>Sets if dictionary encoding is enabled for the column path.
If <cite>None</cite>, uses the default parquet options</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.compression">
<span class="sig-name descname"><span class="pre">compression</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.compression" title="Link to this definition"></a></dt>
<dd><p>Sets default parquet compression codec for the column path. Valid
values are <cite>uncompressed</cite>, <cite>snappy</cite>, <cite>gzip(level)</cite>, <cite>lzo</cite>, <cite>brotli(level)</cite>,
<cite>lz4</cite>, <cite>zstd(level)</cite>, and <cite>lz4_raw</cite>. These values are not case-sensitive. If
<cite>None</cite>, uses the default parquet options.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.statistics_enabled">
<span class="sig-name descname"><span class="pre">statistics_enabled</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.statistics_enabled" title="Link to this definition"></a></dt>
<dd><p>Sets if statistics are enabled for the column Valid values
are: <cite>none</cite>, <cite>chunk</cite>, and <cite>page</cite> These values are not case sensitive. If
<cite>None</cite>, uses the default parquet options.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled">
<span class="sig-name descname"><span class="pre">bloom_filter_enabled</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled" title="Link to this definition"></a></dt>
<dd><p>Sets if bloom filter is enabled for the column path. If
<cite>None</cite>, uses the default parquet options.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp">
<span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp" title="Link to this definition"></a></dt>
<dd><p>Sets bloom filter false positive probability for the column
path. If <cite>None</cite>, uses the default parquet options.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv">
<span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv" title="Link to this definition"></a></dt>
<dd><p>Sets bloom filter number of distinct values. If <cite>None</cite>, uses
the default parquet options.</p>
</dd></dl>
<p>Initialize the ParquetColumnOptions.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="id0">
<span class="sig-name descname"><span class="pre">bloom_filter_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id0" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id1">
<span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id1" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id2">
<span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id2" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id3">
<span class="sig-name descname"><span class="pre">compression</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id3" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id4">
<span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id4" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id5">
<span class="sig-name descname"><span class="pre">encoding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id5" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id6">
<span class="sig-name descname"><span class="pre">statistics_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id6" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">ParquetWriterOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data_pagesize_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">writer_version</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'1.0'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">skip_arrow_metadata</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'zstd(3)'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_page_size_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'page'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_row_group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">created_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'datafusion-python'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_index_truncate_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_truncate_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_page_row_count_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_on_write</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_fpp</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_ndv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">allow_single_file_parallelism</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">maximum_parallel_row_group_writers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_specific_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#datafusion.dataframe.ParquetColumnOptions" title="datafusion.dataframe.ParquetColumnOptions"><span class="pre">ParquetColumnOptions</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions" title="Link to this definition"></a></dt>
<dd><p>Advanced parquet writer options.</p>
<p>Allows settings the writer options that apply to the entire file. Some options can
also be set on a column by column basis, with the field <cite>column_specific_options</cite>
(see <cite>ParquetColumnOptions</cite>).</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit">
<span class="sig-name descname"><span class="pre">data_pagesize_limit</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit" title="Link to this definition"></a></dt>
<dd><p>Sets best effort maximum size of data page in bytes.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.write_batch_size">
<span class="sig-name descname"><span class="pre">write_batch_size</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.write_batch_size" title="Link to this definition"></a></dt>
<dd><p>Sets write_batch_size in bytes.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.writer_version">
<span class="sig-name descname"><span class="pre">writer_version</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.writer_version" title="Link to this definition"></a></dt>
<dd><p>Sets parquet writer version. Valid values are <cite>1.0</cite> and
<cite>2.0</cite>.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata">
<span class="sig-name descname"><span class="pre">skip_arrow_metadata</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata" title="Link to this definition"></a></dt>
<dd><p>Skip encoding the embedded arrow metadata in the
KV_meta.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.compression">
<span class="sig-name descname"><span class="pre">compression</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.compression" title="Link to this definition"></a></dt>
<dd><p>Compression type to use. Default is “zstd(3)”.
Available compression types are
- “uncompressed”: No compression.
- “snappy”: Snappy compression.
- “gzip(n)”: Gzip compression with level n.
- “brotli(n)”: Brotli compression with level n.
- “lz4”: LZ4 compression.
- “lz4_raw”: LZ4_RAW compression.
- “zstd(n)”: Zstandard compression with level n.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.dictionary_enabled">
<span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_enabled" title="Link to this definition"></a></dt>
<dd><p>Sets if dictionary encoding is enabled. If None, uses
the default parquet writer setting.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit">
<span class="sig-name descname"><span class="pre">dictionary_page_size_limit</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit" title="Link to this definition"></a></dt>
<dd><p>Sets best effort maximum dictionary page size,
in bytes.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.statistics_enabled">
<span class="sig-name descname"><span class="pre">statistics_enabled</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.statistics_enabled" title="Link to this definition"></a></dt>
<dd><p>Sets if statistics are enabled for any column Valid
values are <cite>none</cite>, <cite>chunk</cite>, and <cite>page</cite>. If None, uses the default
parquet writer setting.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.max_row_group_size">
<span class="sig-name descname"><span class="pre">max_row_group_size</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.max_row_group_size" title="Link to this definition"></a></dt>
<dd><p>Target maximum number of rows in each row group
(defaults to 1M rows). Writing larger row groups requires more memory to
write, but can get better compression and be faster to read.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.created_by">
<span class="sig-name descname"><span class="pre">created_by</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.created_by" title="Link to this definition"></a></dt>
<dd><p>Sets “created by” property.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length">
<span class="sig-name descname"><span class="pre">column_index_truncate_length</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length" title="Link to this definition"></a></dt>
<dd><p>Sets column index truncate length.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length">
<span class="sig-name descname"><span class="pre">statistics_truncate_length</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length" title="Link to this definition"></a></dt>
<dd><p>Sets statistics truncate length. If None, uses
the default parquet writer setting.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit">
<span class="sig-name descname"><span class="pre">data_page_row_count_limit</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit" title="Link to this definition"></a></dt>
<dd><p>Sets best effort maximum number of rows in a data
page.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.encoding">
<span class="sig-name descname"><span class="pre">encoding</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.encoding" title="Link to this definition"></a></dt>
<dd><p>Sets default encoding for any column. Valid values are <cite>plain</cite>,
<cite>plain_dictionary</cite>, <cite>rle</cite>, <cite>bit_packed</cite>, <cite>delta_binary_packed</cite>,
<cite>delta_length_byte_array</cite>, <cite>delta_byte_array</cite>, <cite>rle_dictionary</cite>, and
<cite>byte_stream_split</cite>. If None, uses the default parquet writer setting.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write">
<span class="sig-name descname"><span class="pre">bloom_filter_on_write</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write" title="Link to this definition"></a></dt>
<dd><p>Write bloom filters for all columns when creating
parquet files.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp">
<span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp" title="Link to this definition"></a></dt>
<dd><p>Sets bloom filter false positive probability. If None,
uses the default parquet writer setting</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv">
<span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv" title="Link to this definition"></a></dt>
<dd><p>Sets bloom filter number of distinct values. If None, uses
the default parquet writer setting.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism">
<span class="sig-name descname"><span class="pre">allow_single_file_parallelism</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism" title="Link to this definition"></a></dt>
<dd><p>Controls whether DataFusion will attempt to
speed up writing parquet files by serializing them in parallel. Each
column in each row group in each output file are serialized in parallel
leveraging a maximum possible core count of n_files * n_row_groups *
n_columns.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers">
<span class="sig-name descname"><span class="pre">maximum_parallel_row_group_writers</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers" title="Link to this definition"></a></dt>
<dd><p>By default parallel parquet writer is
tuned for minimum memory usage in a streaming execution plan. You may
see a performance benefit when writing large parquet files by increasing
<cite>maximum_parallel_row_group_writers</cite> and
<cite>maximum_buffered_record_batches_per_stream</cite> if your system has idle
cores and can tolerate additional memory usage. Boosting these values is
likely worthwhile when writing out already in-memory data, such as from
a cached data frame.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream">
<span class="sig-name descname"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream" title="Link to this definition"></a></dt>
<dd><p>See
<cite>maximum_parallel_row_group_writers</cite>.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.column_specific_options">
<span class="sig-name descname"><span class="pre">column_specific_options</span></span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.column_specific_options" title="Link to this definition"></a></dt>
<dd><p>Overrides options for specific columns. If a column
is not a part of this dictionary, it will use the parameters provided here.</p>
</dd></dl>
<p>Initialize the ParquetWriterOptions.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="id7">
<span class="sig-name descname"><span class="pre">allow_single_file_parallelism</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#id7" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id8">
<span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id8" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id9">
<span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id9" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id10">
<span class="sig-name descname"><span class="pre">bloom_filter_on_write</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#id10" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id11">
<span class="sig-name descname"><span class="pre">column_index_truncate_length</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">64</span></em><a class="headerlink" href="#id11" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id12">
<span class="sig-name descname"><span class="pre">column_specific_options</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id12" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id13">
<span class="sig-name descname"><span class="pre">created_by</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'datafusion-python'</span></em><a class="headerlink" href="#id13" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id14">
<span class="sig-name descname"><span class="pre">data_page_row_count_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">20000</span></em><a class="headerlink" href="#id14" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id15">
<span class="sig-name descname"><span class="pre">data_pagesize_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#id15" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id16">
<span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#id16" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id17">
<span class="sig-name descname"><span class="pre">dictionary_page_size_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#id17" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id18">
<span class="sig-name descname"><span class="pre">encoding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id18" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id19">
<span class="sig-name descname"><span class="pre">max_row_group_size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#id19" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id20">
<span class="sig-name descname"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#id20" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id21">
<span class="sig-name descname"><span class="pre">maximum_parallel_row_group_writers</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#id21" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id22">
<span class="sig-name descname"><span class="pre">skip_arrow_metadata</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#id22" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id23">
<span class="sig-name descname"><span class="pre">statistics_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'page'</span></em><a class="headerlink" href="#id23" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id24">
<span class="sig-name descname"><span class="pre">statistics_truncate_length</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id24" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id25">
<span class="sig-name descname"><span class="pre">write_batch_size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1024</span></em><a class="headerlink" href="#id25" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id26">
<span class="sig-name descname"><span class="pre">writer_version</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'1.0'</span></em><a class="headerlink" href="#id26" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</section>
</section>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="../context/index.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">datafusion.context</p>
</div>
</a>
<a class='right-next' id="next-link" href="../dataframe_formatter/index.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">datafusion.dataframe_formatter</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<!-- Based on pydata_sphinx_theme/footer.html -->
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright 2019-2024, Apache Software Foundation.<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 8.1.3.<br>
</p>
</div>
<div class="footer-item">
<p>Apache Arrow DataFusion, Arrow DataFusion, Apache, the Apache feather logo, and the Apache Arrow DataFusion project logo</p>
<p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</div>
</footer>
</body>
</html>