blob: da598b9d572127b41c921179cd83d90c78ffe43e [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.sql.DataFrame &#8212; PySpark 3.4.3 documentation</title>
<link rel="stylesheet" href="../../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="pyspark.sql.Column" href="pyspark.sql.Column.html" />
<link rel="prev" title="pyspark.sql.Catalog" href="pyspark.sql.Catalog.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../../index.html">Overview</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../user_guide/index.html">User Guides</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../migration_guide/index.html">Migration Guides</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="active">
<a href="../index.html">Spark SQL</a>
<ul>
<li class="active">
<a href="../core_classes.html">Core Classes</a>
</li>
<li class="">
<a href="../spark_session.html">Spark Session</a>
</li>
<li class="">
<a href="../configuration.html">Configuration</a>
</li>
<li class="">
<a href="../io.html">Input/Output</a>
</li>
<li class="">
<a href="../dataframe.html">DataFrame</a>
</li>
<li class="">
<a href="../column.html">Column</a>
</li>
<li class="">
<a href="../data_types.html">Data Types</a>
</li>
<li class="">
<a href="../row.html">Row</a>
</li>
<li class="">
<a href="../functions.html">Functions</a>
</li>
<li class="">
<a href="../window.html">Window</a>
</li>
<li class="">
<a href="../grouping.html">Grouping</a>
</li>
<li class="">
<a href="../catalog.html">Catalog</a>
</li>
<li class="">
<a href="../avro.html">Avro</a>
</li>
<li class="">
<a href="../observation.html">Observation</a>
</li>
<li class="">
<a href="../udf.html">UDF</a>
</li>
<li class="">
<a href="../protobuf.html">Protobuf</a>
</li>
</ul>
</li>
<li class="">
<a href="../../pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="">
<a href="../../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="">
<a href="../../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../../pyspark.streaming.html">Spark Streaming (Legacy)</a>
</li>
<li class="">
<a href="../../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../../pyspark.resource.html">Resource Management</a>
</li>
<li class="">
<a href="../../pyspark.errors.html">Errors</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="pyspark-sql-dataframe">
<h1>pyspark.sql.DataFrame<a class="headerlink" href="#pyspark-sql-dataframe" title="Permalink to this headline"></a></h1>
<dl class="py class">
<dt id="pyspark.sql.DataFrame">
<em class="property">class </em><code class="sig-prename descclassname">pyspark.sql.</code><code class="sig-name descname">DataFrame</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">jdf</span><span class="p">:</span> <span class="n">py4j.java_gateway.JavaObject</span></em>, <em class="sig-param"><span class="n">sql_ctx</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>SQLContext<span class="p">, </span>SparkSession<span class="p">]</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../../../_modules/pyspark/sql/dataframe.html#DataFrame"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.DataFrame" title="Permalink to this definition"></a></dt>
<dd><p>A distributed collection of data grouped into named columns.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.3.0.</span></p>
</div>
<div class="versionchanged">
<p><span class="versionmodified changed">Changed in version 3.4.0: </span>Supports Spark Connect.</p>
</div>
<p class="rubric">Notes</p>
<p>A DataFrame should only be created as described above. It should not be directly
created via using the constructor.</p>
<p class="rubric">Examples</p>
<p>A <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> is equivalent to a relational table in Spark SQL,
and can be created using various functions in <a class="reference internal" href="pyspark.sql.SparkSession.html#pyspark.sql.SparkSession" title="pyspark.sql.SparkSession"><code class="xref py py-class docutils literal notranslate"><span class="pre">SparkSession</span></code></a>:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">people</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;deptId&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">:</span> <span class="mi">40</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;Hyukjin Kwon&quot;</span><span class="p">,</span> <span class="s2">&quot;gender&quot;</span><span class="p">:</span> <span class="s2">&quot;M&quot;</span><span class="p">,</span> <span class="s2">&quot;salary&quot;</span><span class="p">:</span> <span class="mi">50</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;deptId&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">:</span> <span class="mi">50</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;Takuya Ueshin&quot;</span><span class="p">,</span> <span class="s2">&quot;gender&quot;</span><span class="p">:</span> <span class="s2">&quot;M&quot;</span><span class="p">,</span> <span class="s2">&quot;salary&quot;</span><span class="p">:</span> <span class="mi">100</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;deptId&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">:</span> <span class="mi">60</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;Xinrong Meng&quot;</span><span class="p">,</span> <span class="s2">&quot;gender&quot;</span><span class="p">:</span> <span class="s2">&quot;F&quot;</span><span class="p">,</span> <span class="s2">&quot;salary&quot;</span><span class="p">:</span> <span class="mi">150</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;deptId&quot;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;Haejoon Lee&quot;</span><span class="p">,</span> <span class="s2">&quot;gender&quot;</span><span class="p">:</span> <span class="s2">&quot;M&quot;</span><span class="p">,</span> <span class="s2">&quot;salary&quot;</span><span class="p">:</span> <span class="mi">200</span><span class="p">}</span>
<span class="gp">... </span><span class="p">])</span>
</pre></div>
</div>
<p>Once created, it can be manipulated using the various domain-specific-language
(DSL) functions defined in: <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>, <a class="reference internal" href="pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a>.</p>
<p>To select a column from the <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>, use the apply method:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">age_col</span> <span class="o">=</span> <span class="n">people</span><span class="o">.</span><span class="n">age</span>
</pre></div>
</div>
<p>A more concrete example:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="c1"># To create DataFrame using SparkSession</span>
<span class="gp">... </span><span class="n">department</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;PySpark&quot;</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;ML&quot;</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s2">&quot;id&quot;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;Spark SQL&quot;</span><span class="p">}</span>
<span class="gp">... </span><span class="p">])</span>
</pre></div>
</div>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">people</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">people</span><span class="o">.</span><span class="n">age</span> <span class="o">&gt;</span> <span class="mi">30</span><span class="p">)</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">department</span><span class="p">,</span> <span class="n">people</span><span class="o">.</span><span class="n">deptId</span> <span class="o">==</span> <span class="n">department</span><span class="o">.</span><span class="n">id</span><span class="p">)</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">department</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s2">&quot;gender&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s2">&quot;salary&quot;</span><span class="p">:</span> <span class="s2">&quot;avg&quot;</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">:</span> <span class="s2">&quot;max&quot;</span><span class="p">})</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
<span class="go">+-------+------+-----------+--------+</span>
<span class="go">| name|gender|avg(salary)|max(age)|</span>
<span class="go">+-------+------+-----------+--------+</span>
<span class="go">| ML| F| 150.0| 60|</span>
<span class="go">|PySpark| M| 75.0| 50|</span>
<span class="go">+-------+------+-----------+--------+</span>
</pre></div>
</div>
<p class="rubric">Methods</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.agg.html#pyspark.sql.DataFrame.agg" title="pyspark.sql.DataFrame.agg"><code class="xref py py-obj docutils literal notranslate"><span class="pre">agg</span></code></a>(*exprs)</p></td>
<td><p>Aggregate on the entire <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> without groups (shorthand for <code class="docutils literal notranslate"><span class="pre">df.groupBy().agg()</span></code>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.alias.html#pyspark.sql.DataFrame.alias" title="pyspark.sql.DataFrame.alias"><code class="xref py py-obj docutils literal notranslate"><span class="pre">alias</span></code></a>(alias)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with an alias set.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.approxQuantile.html#pyspark.sql.DataFrame.approxQuantile" title="pyspark.sql.DataFrame.approxQuantile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">approxQuantile</span></code></a>(col, probabilities, relativeError)</p></td>
<td><p>Calculates the approximate quantiles of numerical columns of a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.cache.html#pyspark.sql.DataFrame.cache" title="pyspark.sql.DataFrame.cache"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cache</span></code></a>()</p></td>
<td><p>Persists the <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with the default storage level (<cite>MEMORY_AND_DISK_DESER</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.checkpoint.html#pyspark.sql.DataFrame.checkpoint" title="pyspark.sql.DataFrame.checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">checkpoint</span></code></a>([eager])</p></td>
<td><p>Returns a checkpointed version of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.coalesce.html#pyspark.sql.DataFrame.coalesce" title="pyspark.sql.DataFrame.coalesce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">coalesce</span></code></a>(numPartitions)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> that has exactly <cite>numPartitions</cite> partitions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.colRegex.html#pyspark.sql.DataFrame.colRegex" title="pyspark.sql.DataFrame.colRegex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">colRegex</span></code></a>(colName)</p></td>
<td><p>Selects column based on the column name specified as a regex and returns it as <a class="reference internal" href="pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.collect.html#pyspark.sql.DataFrame.collect" title="pyspark.sql.DataFrame.collect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">collect</span></code></a>()</p></td>
<td><p>Returns all the records as a list of <a class="reference internal" href="pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.corr.html#pyspark.sql.DataFrame.corr" title="pyspark.sql.DataFrame.corr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">corr</span></code></a>(col1, col2[, method])</p></td>
<td><p>Calculates the correlation of two columns of a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as a double value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.count.html#pyspark.sql.DataFrame.count" title="pyspark.sql.DataFrame.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">count</span></code></a>()</p></td>
<td><p>Returns the number of rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.cov.html#pyspark.sql.DataFrame.cov" title="pyspark.sql.DataFrame.cov"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cov</span></code></a>(col1, col2)</p></td>
<td><p>Calculate the sample covariance for the given columns, specified by their names, as a double value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.createGlobalTempView.html#pyspark.sql.DataFrame.createGlobalTempView" title="pyspark.sql.DataFrame.createGlobalTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">createGlobalTempView</span></code></a>(name)</p></td>
<td><p>Creates a global temporary view with this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.createOrReplaceGlobalTempView.html#pyspark.sql.DataFrame.createOrReplaceGlobalTempView" title="pyspark.sql.DataFrame.createOrReplaceGlobalTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">createOrReplaceGlobalTempView</span></code></a>(name)</p></td>
<td><p>Creates or replaces a global temporary view using the given name.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.createOrReplaceTempView.html#pyspark.sql.DataFrame.createOrReplaceTempView" title="pyspark.sql.DataFrame.createOrReplaceTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">createOrReplaceTempView</span></code></a>(name)</p></td>
<td><p>Creates or replaces a local temporary view with this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.createTempView.html#pyspark.sql.DataFrame.createTempView" title="pyspark.sql.DataFrame.createTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">createTempView</span></code></a>(name)</p></td>
<td><p>Creates a local temporary view with this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.crossJoin.html#pyspark.sql.DataFrame.crossJoin" title="pyspark.sql.DataFrame.crossJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">crossJoin</span></code></a>(other)</p></td>
<td><p>Returns the cartesian product with another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.crosstab.html#pyspark.sql.DataFrame.crosstab" title="pyspark.sql.DataFrame.crosstab"><code class="xref py py-obj docutils literal notranslate"><span class="pre">crosstab</span></code></a>(col1, col2)</p></td>
<td><p>Computes a pair-wise frequency table of the given columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.cube.html#pyspark.sql.DataFrame.cube" title="pyspark.sql.DataFrame.cube"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cube</span></code></a>(*cols)</p></td>
<td><p>Create a multi-dimensional cube for the current <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified columns, so we can run aggregations on them.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.describe.html#pyspark.sql.DataFrame.describe" title="pyspark.sql.DataFrame.describe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">describe</span></code></a>(*cols)</p></td>
<td><p>Computes basic statistics for numeric and string columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.distinct.html#pyspark.sql.DataFrame.distinct" title="pyspark.sql.DataFrame.distinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">distinct</span></code></a>()</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing the distinct rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.drop.html#pyspark.sql.DataFrame.drop" title="pyspark.sql.DataFrame.drop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">drop</span></code></a>(*cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> without specified columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.dropDuplicates.html#pyspark.sql.DataFrame.dropDuplicates" title="pyspark.sql.DataFrame.dropDuplicates"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dropDuplicates</span></code></a>([subset])</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with duplicate rows removed, optionally only considering certain columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.drop_duplicates.html#pyspark.sql.DataFrame.drop_duplicates" title="pyspark.sql.DataFrame.drop_duplicates"><code class="xref py py-obj docutils literal notranslate"><span class="pre">drop_duplicates</span></code></a>([subset])</p></td>
<td><p><a class="reference internal" href="pyspark.sql.DataFrame.drop_duplicates.html#pyspark.sql.DataFrame.drop_duplicates" title="pyspark.sql.DataFrame.drop_duplicates"><code class="xref py py-func docutils literal notranslate"><span class="pre">drop_duplicates()</span></code></a> is an alias for <a class="reference internal" href="pyspark.sql.DataFrame.dropDuplicates.html#pyspark.sql.DataFrame.dropDuplicates" title="pyspark.sql.DataFrame.dropDuplicates"><code class="xref py py-func docutils literal notranslate"><span class="pre">dropDuplicates()</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.dropna.html#pyspark.sql.DataFrame.dropna" title="pyspark.sql.DataFrame.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dropna</span></code></a>([how, thresh, subset])</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> omitting rows with null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.exceptAll.html#pyspark.sql.DataFrame.exceptAll" title="pyspark.sql.DataFrame.exceptAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">exceptAll</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> but not in another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> while preserving duplicates.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.explain.html#pyspark.sql.DataFrame.explain" title="pyspark.sql.DataFrame.explain"><code class="xref py py-obj docutils literal notranslate"><span class="pre">explain</span></code></a>([extended, mode])</p></td>
<td><p>Prints the (logical and physical) plans to the console for debugging purposes.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.fillna.html#pyspark.sql.DataFrame.fillna" title="pyspark.sql.DataFrame.fillna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fillna</span></code></a>(value[, subset])</p></td>
<td><p>Replace null values, alias for <code class="docutils literal notranslate"><span class="pre">na.fill()</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.filter.html#pyspark.sql.DataFrame.filter" title="pyspark.sql.DataFrame.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">filter</span></code></a>(condition)</p></td>
<td><p>Filters rows using the given condition.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.first.html#pyspark.sql.DataFrame.first" title="pyspark.sql.DataFrame.first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">first</span></code></a>()</p></td>
<td><p>Returns the first row as a <a class="reference internal" href="pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.foreach.html#pyspark.sql.DataFrame.foreach" title="pyspark.sql.DataFrame.foreach"><code class="xref py py-obj docutils literal notranslate"><span class="pre">foreach</span></code></a>(f)</p></td>
<td><p>Applies the <code class="docutils literal notranslate"><span class="pre">f</span></code> function to all <a class="reference internal" href="pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a> of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.foreachPartition.html#pyspark.sql.DataFrame.foreachPartition" title="pyspark.sql.DataFrame.foreachPartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">foreachPartition</span></code></a>(f)</p></td>
<td><p>Applies the <code class="docutils literal notranslate"><span class="pre">f</span></code> function to each partition of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.freqItems.html#pyspark.sql.DataFrame.freqItems" title="pyspark.sql.DataFrame.freqItems"><code class="xref py py-obj docutils literal notranslate"><span class="pre">freqItems</span></code></a>(cols[, support])</p></td>
<td><p>Finding frequent items for columns, possibly with false positives.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.groupBy.html#pyspark.sql.DataFrame.groupBy" title="pyspark.sql.DataFrame.groupBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">groupBy</span></code></a>(*cols)</p></td>
<td><p>Groups the <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified columns, so we can run aggregation on them.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">groupby</span></code>(*cols)</p></td>
<td><p><code class="xref py py-func docutils literal notranslate"><span class="pre">groupby()</span></code> is an alias for <a class="reference internal" href="pyspark.sql.DataFrame.groupBy.html#pyspark.sql.DataFrame.groupBy" title="pyspark.sql.DataFrame.groupBy"><code class="xref py py-func docutils literal notranslate"><span class="pre">groupBy()</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.head.html#pyspark.sql.DataFrame.head" title="pyspark.sql.DataFrame.head"><code class="xref py py-obj docutils literal notranslate"><span class="pre">head</span></code></a>([n])</p></td>
<td><p>Returns the first <code class="docutils literal notranslate"><span class="pre">n</span></code> rows.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.hint.html#pyspark.sql.DataFrame.hint" title="pyspark.sql.DataFrame.hint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hint</span></code></a>(name, *parameters)</p></td>
<td><p>Specifies some hint on the current <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.inputFiles.html#pyspark.sql.DataFrame.inputFiles" title="pyspark.sql.DataFrame.inputFiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">inputFiles</span></code></a>()</p></td>
<td><p>Returns a best-effort snapshot of the files that compose this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.intersect.html#pyspark.sql.DataFrame.intersect" title="pyspark.sql.DataFrame.intersect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">intersect</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows only in both this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.intersectAll.html#pyspark.sql.DataFrame.intersectAll" title="pyspark.sql.DataFrame.intersectAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">intersectAll</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows in both this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> while preserving duplicates.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.isEmpty.html#pyspark.sql.DataFrame.isEmpty" title="pyspark.sql.DataFrame.isEmpty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isEmpty</span></code></a>()</p></td>
<td><p>Returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> is empty.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.isLocal.html#pyspark.sql.DataFrame.isLocal" title="pyspark.sql.DataFrame.isLocal"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isLocal</span></code></a>()</p></td>
<td><p>Returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if the <a class="reference internal" href="pyspark.sql.DataFrame.collect.html#pyspark.sql.DataFrame.collect" title="pyspark.sql.DataFrame.collect"><code class="xref py py-func docutils literal notranslate"><span class="pre">collect()</span></code></a> and <a class="reference internal" href="pyspark.sql.DataFrame.take.html#pyspark.sql.DataFrame.take" title="pyspark.sql.DataFrame.take"><code class="xref py py-func docutils literal notranslate"><span class="pre">take()</span></code></a> methods can be run locally (without any Spark executors).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.join.html#pyspark.sql.DataFrame.join" title="pyspark.sql.DataFrame.join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">join</span></code></a>(other[, on, how])</p></td>
<td><p>Joins with another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>, using the given join expression.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.limit.html#pyspark.sql.DataFrame.limit" title="pyspark.sql.DataFrame.limit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">limit</span></code></a>(num)</p></td>
<td><p>Limits the result count to the number specified.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.localCheckpoint.html#pyspark.sql.DataFrame.localCheckpoint" title="pyspark.sql.DataFrame.localCheckpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">localCheckpoint</span></code></a>([eager])</p></td>
<td><p>Returns a locally checkpointed version of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.mapInArrow.html#pyspark.sql.DataFrame.mapInArrow" title="pyspark.sql.DataFrame.mapInArrow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mapInArrow</span></code></a>(func, schema)</p></td>
<td><p>Maps an iterator of batches in the current <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using a Python native function that takes and outputs a PyArrow’s <cite>RecordBatch</cite>, and returns the result as a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.mapInPandas.html#pyspark.sql.DataFrame.mapInPandas" title="pyspark.sql.DataFrame.mapInPandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mapInPandas</span></code></a>(func, schema)</p></td>
<td><p>Maps an iterator of batches in the current <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using a Python native function that takes and outputs a pandas DataFrame, and returns the result as a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.melt.html#pyspark.sql.DataFrame.melt" title="pyspark.sql.DataFrame.melt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">melt</span></code></a>(ids, values, variableColumnName, …)</p></td>
<td><p>Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.observe.html#pyspark.sql.DataFrame.observe" title="pyspark.sql.DataFrame.observe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">observe</span></code></a>(observation, *exprs)</p></td>
<td><p>Define (named) metrics to observe on the DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.orderBy.html#pyspark.sql.DataFrame.orderBy" title="pyspark.sql.DataFrame.orderBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">orderBy</span></code></a>(*cols, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> sorted by the specified column(s).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.pandas_api.html#pyspark.sql.DataFrame.pandas_api" title="pyspark.sql.DataFrame.pandas_api"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pandas_api</span></code></a>([index_col])</p></td>
<td><p>Converts the existing DataFrame into a pandas-on-Spark DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.persist.html#pyspark.sql.DataFrame.persist" title="pyspark.sql.DataFrame.persist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">persist</span></code></a>([storageLevel])</p></td>
<td><p>Sets the storage level to persist the contents of the <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> across operations after the first time it is computed.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.printSchema.html#pyspark.sql.DataFrame.printSchema" title="pyspark.sql.DataFrame.printSchema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">printSchema</span></code></a>()</p></td>
<td><p>Prints out the schema in the tree format.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.randomSplit.html#pyspark.sql.DataFrame.randomSplit" title="pyspark.sql.DataFrame.randomSplit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">randomSplit</span></code></a>(weights[, seed])</p></td>
<td><p>Randomly splits this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with the provided weights.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.registerTempTable.html#pyspark.sql.DataFrame.registerTempTable" title="pyspark.sql.DataFrame.registerTempTable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">registerTempTable</span></code></a>(name)</p></td>
<td><p>Registers this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as a temporary table using the given name.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.repartition.html#pyspark.sql.DataFrame.repartition" title="pyspark.sql.DataFrame.repartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">repartition</span></code></a>(numPartitions, *cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> partitioned by the given partitioning expressions.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.repartitionByRange.html#pyspark.sql.DataFrame.repartitionByRange" title="pyspark.sql.DataFrame.repartitionByRange"><code class="xref py py-obj docutils literal notranslate"><span class="pre">repartitionByRange</span></code></a>(numPartitions, *cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> partitioned by the given partitioning expressions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.replace.html#pyspark.sql.DataFrame.replace" title="pyspark.sql.DataFrame.replace"><code class="xref py py-obj docutils literal notranslate"><span class="pre">replace</span></code></a>(to_replace[, value, subset])</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> replacing a value with another value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.rollup.html#pyspark.sql.DataFrame.rollup" title="pyspark.sql.DataFrame.rollup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rollup</span></code></a>(*cols)</p></td>
<td><p>Create a multi-dimensional rollup for the current <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified columns, so we can run aggregation on them.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.sameSemantics.html#pyspark.sql.DataFrame.sameSemantics" title="pyspark.sql.DataFrame.sameSemantics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sameSemantics</span></code></a>(other)</p></td>
<td><p>Returns <cite>True</cite> when the logical query plans inside both <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>s are equal and therefore return the same results.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.sample.html#pyspark.sql.DataFrame.sample" title="pyspark.sql.DataFrame.sample"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sample</span></code></a>([withReplacement, fraction, seed])</p></td>
<td><p>Returns a sampled subset of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.sampleBy.html#pyspark.sql.DataFrame.sampleBy" title="pyspark.sql.DataFrame.sampleBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sampleBy</span></code></a>(col, fractions[, seed])</p></td>
<td><p>Returns a stratified sample without replacement based on the fraction given on each stratum.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.select.html#pyspark.sql.DataFrame.select" title="pyspark.sql.DataFrame.select"><code class="xref py py-obj docutils literal notranslate"><span class="pre">select</span></code></a>(*cols)</p></td>
<td><p>Projects a set of expressions and returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.selectExpr.html#pyspark.sql.DataFrame.selectExpr" title="pyspark.sql.DataFrame.selectExpr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">selectExpr</span></code></a>(*expr)</p></td>
<td><p>Projects a set of SQL expressions and returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.semanticHash.html#pyspark.sql.DataFrame.semanticHash" title="pyspark.sql.DataFrame.semanticHash"><code class="xref py py-obj docutils literal notranslate"><span class="pre">semanticHash</span></code></a>()</p></td>
<td><p>Returns a hash code of the logical query plan against this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.show.html#pyspark.sql.DataFrame.show" title="pyspark.sql.DataFrame.show"><code class="xref py py-obj docutils literal notranslate"><span class="pre">show</span></code></a>([n, truncate, vertical])</p></td>
<td><p>Prints the first <code class="docutils literal notranslate"><span class="pre">n</span></code> rows to the console.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.sort.html#pyspark.sql.DataFrame.sort" title="pyspark.sql.DataFrame.sort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sort</span></code></a>(*cols, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> sorted by the specified column(s).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.sortWithinPartitions.html#pyspark.sql.DataFrame.sortWithinPartitions" title="pyspark.sql.DataFrame.sortWithinPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sortWithinPartitions</span></code></a>(*cols, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with each partition sorted by the specified column(s).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.subtract.html#pyspark.sql.DataFrame.subtract" title="pyspark.sql.DataFrame.subtract"><code class="xref py py-obj docutils literal notranslate"><span class="pre">subtract</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> but not in another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.summary.html#pyspark.sql.DataFrame.summary" title="pyspark.sql.DataFrame.summary"><code class="xref py py-obj docutils literal notranslate"><span class="pre">summary</span></code></a>(*statistics)</p></td>
<td><p>Computes specified statistics for numeric and string columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.tail.html#pyspark.sql.DataFrame.tail" title="pyspark.sql.DataFrame.tail"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tail</span></code></a>(num)</p></td>
<td><p>Returns the last <code class="docutils literal notranslate"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code> of <a class="reference internal" href="pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.take.html#pyspark.sql.DataFrame.take" title="pyspark.sql.DataFrame.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">take</span></code></a>(num)</p></td>
<td><p>Returns the first <code class="docutils literal notranslate"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code> of <a class="reference internal" href="pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.to.html#pyspark.sql.DataFrame.to" title="pyspark.sql.DataFrame.to"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to</span></code></a>(schema)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> where each row is reconciled to match the specified schema.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.toDF.html#pyspark.sql.DataFrame.toDF" title="pyspark.sql.DataFrame.toDF"><code class="xref py py-obj docutils literal notranslate"><span class="pre">toDF</span></code></a>(*cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> that with new specified column names</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.toJSON.html#pyspark.sql.DataFrame.toJSON" title="pyspark.sql.DataFrame.toJSON"><code class="xref py py-obj docutils literal notranslate"><span class="pre">toJSON</span></code></a>([use_unicode])</p></td>
<td><p>Converts a <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> into a <code class="xref py py-class docutils literal notranslate"><span class="pre">RDD</span></code> of string.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.toLocalIterator.html#pyspark.sql.DataFrame.toLocalIterator" title="pyspark.sql.DataFrame.toLocalIterator"><code class="xref py py-obj docutils literal notranslate"><span class="pre">toLocalIterator</span></code></a>([prefetchPartitions])</p></td>
<td><p>Returns an iterator that contains all of the rows in this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.toPandas.html#pyspark.sql.DataFrame.toPandas" title="pyspark.sql.DataFrame.toPandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">toPandas</span></code></a>()</p></td>
<td><p>Returns the contents of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as Pandas <code class="docutils literal notranslate"><span class="pre">pandas.DataFrame</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_koalas</span></code>([index_col])</p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.to_pandas_on_spark.html#pyspark.sql.DataFrame.to_pandas_on_spark" title="pyspark.sql.DataFrame.to_pandas_on_spark"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_pandas_on_spark</span></code></a>([index_col])</p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.transform.html#pyspark.sql.DataFrame.transform" title="pyspark.sql.DataFrame.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transform</span></code></a>(func, *args, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.union.html#pyspark.sql.DataFrame.union" title="pyspark.sql.DataFrame.union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">union</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing union of rows in this and another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.unionAll.html#pyspark.sql.DataFrame.unionAll" title="pyspark.sql.DataFrame.unionAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unionAll</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing union of rows in this and another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.unionByName.html#pyspark.sql.DataFrame.unionByName" title="pyspark.sql.DataFrame.unionByName"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unionByName</span></code></a>(other[, allowMissingColumns])</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing union of rows in this and another <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.unpersist.html#pyspark.sql.DataFrame.unpersist" title="pyspark.sql.DataFrame.unpersist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unpersist</span></code></a>([blocking])</p></td>
<td><p>Marks the <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as non-persistent, and remove all blocks for it from memory and disk.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.unpivot.html#pyspark.sql.DataFrame.unpivot" title="pyspark.sql.DataFrame.unpivot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unpivot</span></code></a>(ids, values, variableColumnName, …)</p></td>
<td><p>Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.where.html#pyspark.sql.DataFrame.where" title="pyspark.sql.DataFrame.where"><code class="xref py py-obj docutils literal notranslate"><span class="pre">where</span></code></a>(condition)</p></td>
<td><p><a class="reference internal" href="pyspark.sql.DataFrame.where.html#pyspark.sql.DataFrame.where" title="pyspark.sql.DataFrame.where"><code class="xref py py-func docutils literal notranslate"><span class="pre">where()</span></code></a> is an alias for <a class="reference internal" href="pyspark.sql.DataFrame.filter.html#pyspark.sql.DataFrame.filter" title="pyspark.sql.DataFrame.filter"><code class="xref py py-func docutils literal notranslate"><span class="pre">filter()</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.withColumn.html#pyspark.sql.DataFrame.withColumn" title="pyspark.sql.DataFrame.withColumn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">withColumn</span></code></a>(colName, col)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by adding a column or replacing the existing column that has the same name.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.withColumnRenamed.html#pyspark.sql.DataFrame.withColumnRenamed" title="pyspark.sql.DataFrame.withColumnRenamed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">withColumnRenamed</span></code></a>(existing, new)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by renaming an existing column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.withColumns.html#pyspark.sql.DataFrame.withColumns" title="pyspark.sql.DataFrame.withColumns"><code class="xref py py-obj docutils literal notranslate"><span class="pre">withColumns</span></code></a>(*colsMap)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by adding multiple columns or replacing the existing columns that have the same names.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.withColumnsRenamed.html#pyspark.sql.DataFrame.withColumnsRenamed" title="pyspark.sql.DataFrame.withColumnsRenamed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">withColumnsRenamed</span></code></a>(colsMap)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by renaming multiple columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.withMetadata.html#pyspark.sql.DataFrame.withMetadata" title="pyspark.sql.DataFrame.withMetadata"><code class="xref py py-obj docutils literal notranslate"><span class="pre">withMetadata</span></code></a>(columnName, metadata)</p></td>
<td><p>Returns a new <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by updating an existing column with metadata.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.withWatermark.html#pyspark.sql.DataFrame.withWatermark" title="pyspark.sql.DataFrame.withWatermark"><code class="xref py py-obj docutils literal notranslate"><span class="pre">withWatermark</span></code></a>(eventTime, delayThreshold)</p></td>
<td><p>Defines an event time watermark for this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.writeTo.html#pyspark.sql.DataFrame.writeTo" title="pyspark.sql.DataFrame.writeTo"><code class="xref py py-obj docutils literal notranslate"><span class="pre">writeTo</span></code></a>(table)</p></td>
<td><p>Create a write configuration builder for v2 sources.</p></td>
</tr>
</tbody>
</table>
<p class="rubric">Attributes</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.columns.html#pyspark.sql.DataFrame.columns" title="pyspark.sql.DataFrame.columns"><code class="xref py py-obj docutils literal notranslate"><span class="pre">columns</span></code></a></p></td>
<td><p>Returns all column names as a list.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.dtypes.html#pyspark.sql.DataFrame.dtypes" title="pyspark.sql.DataFrame.dtypes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dtypes</span></code></a></p></td>
<td><p>Returns all column names and their data types as a list.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.isStreaming.html#pyspark.sql.DataFrame.isStreaming" title="pyspark.sql.DataFrame.isStreaming"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isStreaming</span></code></a></p></td>
<td><p>Returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> contains one or more sources that continuously return data as it arrives.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.na.html#pyspark.sql.DataFrame.na" title="pyspark.sql.DataFrame.na"><code class="xref py py-obj docutils literal notranslate"><span class="pre">na</span></code></a></p></td>
<td><p>Returns a <a class="reference internal" href="pyspark.sql.DataFrameNaFunctions.html#pyspark.sql.DataFrameNaFunctions" title="pyspark.sql.DataFrameNaFunctions"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrameNaFunctions</span></code></a> for handling missing values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.rdd.html#pyspark.sql.DataFrame.rdd" title="pyspark.sql.DataFrame.rdd"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rdd</span></code></a></p></td>
<td><p>Returns the content as an <a class="reference internal" href="../../api/pyspark.RDD.html#pyspark.RDD" title="pyspark.RDD"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.RDD</span></code></a> of <a class="reference internal" href="pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.schema.html#pyspark.sql.DataFrame.schema" title="pyspark.sql.DataFrame.schema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">schema</span></code></a></p></td>
<td><p>Returns the schema of this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as a <a class="reference internal" href="pyspark.sql.types.StructType.html#pyspark.sql.types.StructType" title="pyspark.sql.types.StructType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.StructType</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.sparkSession.html#pyspark.sql.DataFrame.sparkSession" title="pyspark.sql.DataFrame.sparkSession"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sparkSession</span></code></a></p></td>
<td><p>Returns Spark session that created this <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">sql_ctx</span></code></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.stat.html#pyspark.sql.DataFrame.stat" title="pyspark.sql.DataFrame.stat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stat</span></code></a></p></td>
<td><p>Returns a <a class="reference internal" href="pyspark.sql.DataFrameStatFunctions.html#pyspark.sql.DataFrameStatFunctions" title="pyspark.sql.DataFrameStatFunctions"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrameStatFunctions</span></code></a> for statistic functions.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.storageLevel.html#pyspark.sql.DataFrame.storageLevel" title="pyspark.sql.DataFrame.storageLevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">storageLevel</span></code></a></p></td>
<td><p>Get the <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>’s current storage level.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.write.html#pyspark.sql.DataFrame.write" title="pyspark.sql.DataFrame.write"><code class="xref py py-obj docutils literal notranslate"><span class="pre">write</span></code></a></p></td>
<td><p>Interface for saving the content of the non-streaming <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> out into external storage.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="pyspark.sql.DataFrame.writeStream.html#pyspark.sql.DataFrame.writeStream" title="pyspark.sql.DataFrame.writeStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">writeStream</span></code></a></p></td>
<td><p>Interface for saving the content of the streaming <a class="reference internal" href="#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> out into external storage.</p></td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.sql.Catalog.html" title="previous page">pyspark.sql.Catalog</a>
<a class='right-next' id="next-link" href="pyspark.sql.Column.html" title="next page">pyspark.sql.Column</a>
</div>
</main>
</div>
</div>
<script src="../../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>