blob: f28ce5ee481aaedd32e9c44cf6b44e0671980ea4 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Spark SQL &#8212; PySpark 3.1.1 documentation</title>
<link rel="stylesheet" href="../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/language_data.js"></script>
<script src="../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/index.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="pyspark.sql.SparkSession" href="api/pyspark.sql.SparkSession.html" />
<link rel="prev" title="API Reference" href="index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../index.html">
<img src="../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="active">
<a href="">Spark SQL</a>
</li>
<li class="">
<a href="pyspark.ss.html">Structured Streaming</a>
</li>
<li class="">
<a href="pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="pyspark.streaming.html">Spark Streaming</a>
</li>
<li class="">
<a href="pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="pyspark.resource.html">Resource Management</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
<li class="nav-item toc-entry toc-h2">
<a href="#core-classes" class="nav-link">Core Classes</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#spark-session-apis" class="nav-link">Spark Session APIs</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#configuration" class="nav-link">Configuration</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#input-and-output" class="nav-link">Input and Output</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#dataframe-apis" class="nav-link">DataFrame APIs</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#column-apis" class="nav-link">Column APIs</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#data-types" class="nav-link">Data Types</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#row" class="nav-link">Row</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#functions" class="nav-link">Functions</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#window" class="nav-link">Window</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#grouping" class="nav-link">Grouping</a>
</li>
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="spark-sql">
<h1>Spark SQL<a class="headerlink" href="#spark-sql" title="Permalink to this headline"></a></h1>
<div class="section" id="core-classes">
<h2>Core Classes<a class="headerlink" href="#core-classes" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession" title="pyspark.sql.SparkSession"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession</span></code></a>(sparkContext[, jsparkSession])</p></td>
<td><p>The entry point to programming Spark with the Dataset and DataFrame API.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame</span></code></a>(jdf, sql_ctx)</p></td>
<td><p>A distributed collection of data grouped into named columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column</span></code></a>(jc)</p></td>
<td><p>A column in a DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Row</span></code></a></p></td>
<td><p>A row in <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.html#pyspark.sql.GroupedData" title="pyspark.sql.GroupedData"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData</span></code></a>(jgd, df)</p></td>
<td><p>A set of methods for aggregations on a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>, created by <a class="reference internal" href="api/pyspark.sql.DataFrame.groupBy.html#pyspark.sql.DataFrame.groupBy" title="pyspark.sql.DataFrame.groupBy"><code class="xref py py-func docutils literal notranslate"><span class="pre">DataFrame.groupBy()</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.PandasCogroupedOps.html#pyspark.sql.PandasCogroupedOps" title="pyspark.sql.PandasCogroupedOps"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PandasCogroupedOps</span></code></a>(gd1, gd2)</p></td>
<td><p>A logical grouping of two <a class="reference internal" href="api/pyspark.sql.GroupedData.html#pyspark.sql.GroupedData" title="pyspark.sql.GroupedData"><code class="xref py py-class docutils literal notranslate"><span class="pre">GroupedData</span></code></a>, created by <a class="reference internal" href="api/pyspark.sql.GroupedData.cogroup.html#pyspark.sql.GroupedData.cogroup" title="pyspark.sql.GroupedData.cogroup"><code class="xref py py-func docutils literal notranslate"><span class="pre">GroupedData.cogroup()</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameNaFunctions.html#pyspark.sql.DataFrameNaFunctions" title="pyspark.sql.DataFrameNaFunctions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameNaFunctions</span></code></a>(df)</p></td>
<td><p>Functionality for working with missing data in <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.html#pyspark.sql.DataFrameStatFunctions" title="pyspark.sql.DataFrameStatFunctions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions</span></code></a>(df)</p></td>
<td><p>Functionality for statistic functions with <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Window.html#pyspark.sql.Window" title="pyspark.sql.Window"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window</span></code></a></p></td>
<td><p>Utility functions for defining window in DataFrames.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="spark-session-apis">
<h2>Spark Session APIs<a class="headerlink" href="#spark-session-apis" title="Permalink to this headline"></a></h2>
<p>The entry point to programming Spark with the Dataset and DataFrame API.
To create a Spark session, you should use <code class="docutils literal notranslate"><span class="pre">SparkSession.builder</span></code> attribute.
See also <a class="reference internal" href="api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession" title="pyspark.sql.SparkSession"><code class="xref py py-class docutils literal notranslate"><span class="pre">SparkSession</span></code></a>.</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.builder.appName.html#pyspark.sql.SparkSession.builder.appName" title="pyspark.sql.SparkSession.builder.appName"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.builder.appName</span></code></a>(name)</p></td>
<td><p>Sets a name for the application, which will be shown in the Spark web UI.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.builder.config.html#pyspark.sql.SparkSession.builder.config" title="pyspark.sql.SparkSession.builder.config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.builder.config</span></code></a>([key, value, conf])</p></td>
<td><p>Sets a config option.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.builder.enableHiveSupport.html#pyspark.sql.SparkSession.builder.enableHiveSupport" title="pyspark.sql.SparkSession.builder.enableHiveSupport"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.builder.enableHiveSupport</span></code></a>()</p></td>
<td><p>Enables Hive support, including connectivity to a persistent Hive metastore, support for Hive SerDes, and Hive user-defined functions.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.builder.getOrCreate.html#pyspark.sql.SparkSession.builder.getOrCreate" title="pyspark.sql.SparkSession.builder.getOrCreate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.builder.getOrCreate</span></code></a>()</p></td>
<td><p>Gets an existing <a class="reference internal" href="api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession" title="pyspark.sql.SparkSession"><code class="xref py py-class docutils literal notranslate"><span class="pre">SparkSession</span></code></a> or, if there is no existing one, creates a new one based on the options set in this builder.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.builder.master.html#pyspark.sql.SparkSession.builder.master" title="pyspark.sql.SparkSession.builder.master"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.builder.master</span></code></a>(master)</p></td>
<td><p>Sets the Spark master URL to connect to, such as “local” to run locally, “local[4]” to run locally with 4 cores, or “spark://master:7077” to run on a Spark standalone cluster.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.catalog.html#pyspark.sql.SparkSession.catalog" title="pyspark.sql.SparkSession.catalog"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.catalog</span></code></a></p></td>
<td><p>Interface through which the user may create, drop, alter or query underlying databases, tables, functions, etc.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.conf.html#pyspark.sql.SparkSession.conf" title="pyspark.sql.SparkSession.conf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.conf</span></code></a></p></td>
<td><p>Runtime configuration interface for Spark.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.createDataFrame.html#pyspark.sql.SparkSession.createDataFrame" title="pyspark.sql.SparkSession.createDataFrame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.createDataFrame</span></code></a>(data[, schema, …])</p></td>
<td><p>Creates a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> from an <code class="xref py py-class docutils literal notranslate"><span class="pre">RDD</span></code>, a list or a <code class="xref py py-class docutils literal notranslate"><span class="pre">pandas.DataFrame</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.getActiveSession.html#pyspark.sql.SparkSession.getActiveSession" title="pyspark.sql.SparkSession.getActiveSession"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.getActiveSession</span></code></a>()</p></td>
<td><p>Returns the active SparkSession for the current thread, returned by the builder</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.newSession.html#pyspark.sql.SparkSession.newSession" title="pyspark.sql.SparkSession.newSession"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.newSession</span></code></a>()</p></td>
<td><p>Returns a new SparkSession as new session, that has separate SQLConf, registered temporary views and UDFs, but shared SparkContext and table cache.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.range.html#pyspark.sql.SparkSession.range" title="pyspark.sql.SparkSession.range"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.range</span></code></a>(start[, end, step, …])</p></td>
<td><p>Create a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with single <a class="reference internal" href="api/pyspark.sql.types.LongType.html#pyspark.sql.types.LongType" title="pyspark.sql.types.LongType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.LongType</span></code></a> column named <code class="docutils literal notranslate"><span class="pre">id</span></code>, containing elements in a range from <code class="docutils literal notranslate"><span class="pre">start</span></code> to <code class="docutils literal notranslate"><span class="pre">end</span></code> (exclusive) with step value <code class="docutils literal notranslate"><span class="pre">step</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.read.html#pyspark.sql.SparkSession.read" title="pyspark.sql.SparkSession.read"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.read</span></code></a></p></td>
<td><p>Returns a <code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrameReader</span></code> that can be used to read data in as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.readStream.html#pyspark.sql.SparkSession.readStream" title="pyspark.sql.SparkSession.readStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.readStream</span></code></a></p></td>
<td><p>Returns a <code class="xref py py-class docutils literal notranslate"><span class="pre">DataStreamReader</span></code> that can be used to read data streams as a streaming <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.sparkContext.html#pyspark.sql.SparkSession.sparkContext" title="pyspark.sql.SparkSession.sparkContext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.sparkContext</span></code></a></p></td>
<td><p>Returns the underlying <code class="xref py py-class docutils literal notranslate"><span class="pre">SparkContext</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.sql.html#pyspark.sql.SparkSession.sql" title="pyspark.sql.SparkSession.sql"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.sql</span></code></a>(sqlQuery)</p></td>
<td><p>Returns a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> representing the result of the given query.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.stop.html#pyspark.sql.SparkSession.stop" title="pyspark.sql.SparkSession.stop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.stop</span></code></a>()</p></td>
<td><p>Stop the underlying <code class="xref py py-class docutils literal notranslate"><span class="pre">SparkContext</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.streams.html#pyspark.sql.SparkSession.streams" title="pyspark.sql.SparkSession.streams"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.streams</span></code></a></p></td>
<td><p>Returns a <code class="xref py py-class docutils literal notranslate"><span class="pre">StreamingQueryManager</span></code> that allows managing all the <code class="xref py py-class docutils literal notranslate"><span class="pre">StreamingQuery</span></code> instances active on <cite>this</cite> context.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.table.html#pyspark.sql.SparkSession.table" title="pyspark.sql.SparkSession.table"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.table</span></code></a>(tableName)</p></td>
<td><p>Returns the specified table as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.udf.html#pyspark.sql.SparkSession.udf" title="pyspark.sql.SparkSession.udf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.udf</span></code></a></p></td>
<td><p>Returns a <code class="xref py py-class docutils literal notranslate"><span class="pre">UDFRegistration</span></code> for UDF registration.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.SparkSession.version.html#pyspark.sql.SparkSession.version" title="pyspark.sql.SparkSession.version"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkSession.version</span></code></a></p></td>
<td><p>The version of Spark on which this application is running.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="configuration">
<h2>Configuration<a class="headerlink" href="#configuration" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.conf.RuntimeConfig.html#pyspark.sql.conf.RuntimeConfig" title="pyspark.sql.conf.RuntimeConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RuntimeConfig</span></code></a>(jconf)</p></td>
<td><p>User-facing configuration API, accessible through <cite>SparkSession.conf</cite>.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="input-and-output">
<h2>Input and Output<a class="headerlink" href="#input-and-output" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.csv.html#pyspark.sql.DataFrameReader.csv" title="pyspark.sql.DataFrameReader.csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.csv</span></code></a>(path[, schema, sep, …])</p></td>
<td><p>Loads a CSV file and returns the result as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.format.html#pyspark.sql.DataFrameReader.format" title="pyspark.sql.DataFrameReader.format"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.format</span></code></a>(source)</p></td>
<td><p>Specifies the input data source format.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.jdbc.html#pyspark.sql.DataFrameReader.jdbc" title="pyspark.sql.DataFrameReader.jdbc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.jdbc</span></code></a>(url, table[, column, …])</p></td>
<td><p>Construct a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> representing the database table named <code class="docutils literal notranslate"><span class="pre">table</span></code> accessible via JDBC URL <code class="docutils literal notranslate"><span class="pre">url</span></code> and connection <code class="docutils literal notranslate"><span class="pre">properties</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.json.html#pyspark.sql.DataFrameReader.json" title="pyspark.sql.DataFrameReader.json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.json</span></code></a>(path[, schema, …])</p></td>
<td><p>Loads JSON files and returns the results as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.load.html#pyspark.sql.DataFrameReader.load" title="pyspark.sql.DataFrameReader.load"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.load</span></code></a>([path, format, schema])</p></td>
<td><p>Loads data from a data source and returns it as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.option.html#pyspark.sql.DataFrameReader.option" title="pyspark.sql.DataFrameReader.option"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.option</span></code></a>(key, value)</p></td>
<td><p>Adds an input option for the underlying data source.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.options.html#pyspark.sql.DataFrameReader.options" title="pyspark.sql.DataFrameReader.options"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.options</span></code></a>(**options)</p></td>
<td><p>Adds input options for the underlying data source.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.orc.html#pyspark.sql.DataFrameReader.orc" title="pyspark.sql.DataFrameReader.orc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.orc</span></code></a>(path[, mergeSchema, …])</p></td>
<td><p>Loads ORC files, returning the result as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.parquet.html#pyspark.sql.DataFrameReader.parquet" title="pyspark.sql.DataFrameReader.parquet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.parquet</span></code></a>(*paths, **options)</p></td>
<td><p>Loads Parquet files, returning the result as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.schema.html#pyspark.sql.DataFrameReader.schema" title="pyspark.sql.DataFrameReader.schema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.schema</span></code></a>(schema)</p></td>
<td><p>Specifies the input schema.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameReader.table.html#pyspark.sql.DataFrameReader.table" title="pyspark.sql.DataFrameReader.table"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameReader.table</span></code></a>(tableName)</p></td>
<td><p>Returns the specified table as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.bucketBy.html#pyspark.sql.DataFrameWriter.bucketBy" title="pyspark.sql.DataFrameWriter.bucketBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.bucketBy</span></code></a>(numBuckets, col, *cols)</p></td>
<td><p>Buckets the output by the given columns.If specified, the output is laid out on the file system similar to Hive’s bucketing scheme.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.csv.html#pyspark.sql.DataFrameWriter.csv" title="pyspark.sql.DataFrameWriter.csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.csv</span></code></a>(path[, mode, …])</p></td>
<td><p>Saves the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> in CSV format at the specified path.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.format.html#pyspark.sql.DataFrameWriter.format" title="pyspark.sql.DataFrameWriter.format"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.format</span></code></a>(source)</p></td>
<td><p>Specifies the underlying output data source.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.insertInto.html#pyspark.sql.DataFrameWriter.insertInto" title="pyspark.sql.DataFrameWriter.insertInto"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.insertInto</span></code></a>(tableName[, …])</p></td>
<td><p>Inserts the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> to the specified table.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.jdbc.html#pyspark.sql.DataFrameWriter.jdbc" title="pyspark.sql.DataFrameWriter.jdbc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.jdbc</span></code></a>(url, table[, mode, …])</p></td>
<td><p>Saves the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> to an external database table via JDBC.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.json.html#pyspark.sql.DataFrameWriter.json" title="pyspark.sql.DataFrameWriter.json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.json</span></code></a>(path[, mode, …])</p></td>
<td><p>Saves the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> in JSON format (<a class="reference external" href="http://jsonlines.org/">JSON Lines text format or newline-delimited JSON</a>) at the specified path.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.mode.html#pyspark.sql.DataFrameWriter.mode" title="pyspark.sql.DataFrameWriter.mode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.mode</span></code></a>(saveMode)</p></td>
<td><p>Specifies the behavior when data or table already exists.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.option.html#pyspark.sql.DataFrameWriter.option" title="pyspark.sql.DataFrameWriter.option"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.option</span></code></a>(key, value)</p></td>
<td><p>Adds an output option for the underlying data source.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.options.html#pyspark.sql.DataFrameWriter.options" title="pyspark.sql.DataFrameWriter.options"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.options</span></code></a>(**options)</p></td>
<td><p>Adds output options for the underlying data source.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.orc.html#pyspark.sql.DataFrameWriter.orc" title="pyspark.sql.DataFrameWriter.orc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.orc</span></code></a>(path[, mode, …])</p></td>
<td><p>Saves the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> in ORC format at the specified path.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.parquet.html#pyspark.sql.DataFrameWriter.parquet" title="pyspark.sql.DataFrameWriter.parquet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.parquet</span></code></a>(path[, mode, …])</p></td>
<td><p>Saves the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> in Parquet format at the specified path.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.partitionBy.html#pyspark.sql.DataFrameWriter.partitionBy" title="pyspark.sql.DataFrameWriter.partitionBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.partitionBy</span></code></a>(*cols)</p></td>
<td><p>Partitions the output by the given columns on the file system.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.save.html#pyspark.sql.DataFrameWriter.save" title="pyspark.sql.DataFrameWriter.save"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.save</span></code></a>([path, format, mode, …])</p></td>
<td><p>Saves the contents of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> to a data source.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.saveAsTable.html#pyspark.sql.DataFrameWriter.saveAsTable" title="pyspark.sql.DataFrameWriter.saveAsTable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.saveAsTable</span></code></a>(name[, format, …])</p></td>
<td><p>Saves the content of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as the specified table.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.sortBy.html#pyspark.sql.DataFrameWriter.sortBy" title="pyspark.sql.DataFrameWriter.sortBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.sortBy</span></code></a>(col, *cols)</p></td>
<td><p>Sorts the output in each bucket by the given columns on the file system.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameWriter.text.html#pyspark.sql.DataFrameWriter.text" title="pyspark.sql.DataFrameWriter.text"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriter.text</span></code></a>(path[, compression, …])</p></td>
<td><p>Saves the content of the DataFrame in a text file at the specified path.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="dataframe-apis">
<h2>DataFrame APIs<a class="headerlink" href="#dataframe-apis" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.agg.html#pyspark.sql.DataFrame.agg" title="pyspark.sql.DataFrame.agg"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.agg</span></code></a>(*exprs)</p></td>
<td><p>Aggregate on the entire <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> without groups (shorthand for <code class="docutils literal notranslate"><span class="pre">df.groupBy().agg()</span></code>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.alias.html#pyspark.sql.DataFrame.alias" title="pyspark.sql.DataFrame.alias"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.alias</span></code></a>(alias)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with an alias set.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.approxQuantile.html#pyspark.sql.DataFrame.approxQuantile" title="pyspark.sql.DataFrame.approxQuantile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.approxQuantile</span></code></a>(col, probabilities, …)</p></td>
<td><p>Calculates the approximate quantiles of numerical columns of a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.cache.html#pyspark.sql.DataFrame.cache" title="pyspark.sql.DataFrame.cache"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cache</span></code></a>()</p></td>
<td><p>Persists the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with the default storage level (<cite>MEMORY_AND_DISK</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.checkpoint.html#pyspark.sql.DataFrame.checkpoint" title="pyspark.sql.DataFrame.checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.checkpoint</span></code></a>([eager])</p></td>
<td><p>Returns a checkpointed version of this Dataset.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.coalesce.html#pyspark.sql.DataFrame.coalesce" title="pyspark.sql.DataFrame.coalesce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.coalesce</span></code></a>(numPartitions)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> that has exactly <cite>numPartitions</cite> partitions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.colRegex.html#pyspark.sql.DataFrame.colRegex" title="pyspark.sql.DataFrame.colRegex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.colRegex</span></code></a>(colName)</p></td>
<td><p>Selects column based on the column name specified as a regex and returns it as <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.collect.html#pyspark.sql.DataFrame.collect" title="pyspark.sql.DataFrame.collect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.collect</span></code></a>()</p></td>
<td><p>Returns all the records as a list of <a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.columns.html#pyspark.sql.DataFrame.columns" title="pyspark.sql.DataFrame.columns"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.columns</span></code></a></p></td>
<td><p>Returns all column names as a list.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.corr.html#pyspark.sql.DataFrame.corr" title="pyspark.sql.DataFrame.corr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.corr</span></code></a>(col1, col2[, method])</p></td>
<td><p>Calculates the correlation of two columns of a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as a double value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.count.html#pyspark.sql.DataFrame.count" title="pyspark.sql.DataFrame.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.count</span></code></a>()</p></td>
<td><p>Returns the number of rows in this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.cov.html#pyspark.sql.DataFrame.cov" title="pyspark.sql.DataFrame.cov"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cov</span></code></a>(col1, col2)</p></td>
<td><p>Calculate the sample covariance for the given columns, specified by their names, as a double value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.createGlobalTempView.html#pyspark.sql.DataFrame.createGlobalTempView" title="pyspark.sql.DataFrame.createGlobalTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.createGlobalTempView</span></code></a>(name)</p></td>
<td><p>Creates a global temporary view with this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.createOrReplaceGlobalTempView.html#pyspark.sql.DataFrame.createOrReplaceGlobalTempView" title="pyspark.sql.DataFrame.createOrReplaceGlobalTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.createOrReplaceGlobalTempView</span></code></a>(name)</p></td>
<td><p>Creates or replaces a global temporary view using the given name.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.createOrReplaceTempView.html#pyspark.sql.DataFrame.createOrReplaceTempView" title="pyspark.sql.DataFrame.createOrReplaceTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.createOrReplaceTempView</span></code></a>(name)</p></td>
<td><p>Creates or replaces a local temporary view with this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.createTempView.html#pyspark.sql.DataFrame.createTempView" title="pyspark.sql.DataFrame.createTempView"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.createTempView</span></code></a>(name)</p></td>
<td><p>Creates a local temporary view with this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.crossJoin.html#pyspark.sql.DataFrame.crossJoin" title="pyspark.sql.DataFrame.crossJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.crossJoin</span></code></a>(other)</p></td>
<td><p>Returns the cartesian product with another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.crosstab.html#pyspark.sql.DataFrame.crosstab" title="pyspark.sql.DataFrame.crosstab"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.crosstab</span></code></a>(col1, col2)</p></td>
<td><p>Computes a pair-wise frequency table of the given columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.cube.html#pyspark.sql.DataFrame.cube" title="pyspark.sql.DataFrame.cube"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cube</span></code></a>(*cols)</p></td>
<td><p>Create a multi-dimensional cube for the current <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified columns, so we can run aggregations on them.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.describe.html#pyspark.sql.DataFrame.describe" title="pyspark.sql.DataFrame.describe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.describe</span></code></a>(*cols)</p></td>
<td><p>Computes basic statistics for numeric and string columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.distinct.html#pyspark.sql.DataFrame.distinct" title="pyspark.sql.DataFrame.distinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.distinct</span></code></a>()</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing the distinct rows in this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.drop.html#pyspark.sql.DataFrame.drop" title="pyspark.sql.DataFrame.drop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.drop</span></code></a>(*cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> that drops the specified column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.dropDuplicates.html#pyspark.sql.DataFrame.dropDuplicates" title="pyspark.sql.DataFrame.dropDuplicates"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.dropDuplicates</span></code></a>([subset])</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with duplicate rows removed, optionally only considering certain columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.drop_duplicates.html#pyspark.sql.DataFrame.drop_duplicates" title="pyspark.sql.DataFrame.drop_duplicates"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.drop_duplicates</span></code></a>([subset])</p></td>
<td><p><code class="xref py py-func docutils literal notranslate"><span class="pre">drop_duplicates()</span></code> is an alias for <code class="xref py py-func docutils literal notranslate"><span class="pre">dropDuplicates()</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.dropna.html#pyspark.sql.DataFrame.dropna" title="pyspark.sql.DataFrame.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.dropna</span></code></a>([how, thresh, subset])</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> omitting rows with null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.dtypes.html#pyspark.sql.DataFrame.dtypes" title="pyspark.sql.DataFrame.dtypes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.dtypes</span></code></a></p></td>
<td><p>Returns all column names and their data types as a list.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.exceptAll.html#pyspark.sql.DataFrame.exceptAll" title="pyspark.sql.DataFrame.exceptAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.exceptAll</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows in this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> but not in another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> while preserving duplicates.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.explain.html#pyspark.sql.DataFrame.explain" title="pyspark.sql.DataFrame.explain"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.explain</span></code></a>([extended, mode])</p></td>
<td><p>Prints the (logical and physical) plans to the console for debugging purpose.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.fillna.html#pyspark.sql.DataFrame.fillna" title="pyspark.sql.DataFrame.fillna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.fillna</span></code></a>(value[, subset])</p></td>
<td><p>Replace null values, alias for <code class="docutils literal notranslate"><span class="pre">na.fill()</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.filter.html#pyspark.sql.DataFrame.filter" title="pyspark.sql.DataFrame.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.filter</span></code></a>(condition)</p></td>
<td><p>Filters rows using the given condition.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.first.html#pyspark.sql.DataFrame.first" title="pyspark.sql.DataFrame.first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.first</span></code></a>()</p></td>
<td><p>Returns the first row as a <a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.foreach.html#pyspark.sql.DataFrame.foreach" title="pyspark.sql.DataFrame.foreach"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.foreach</span></code></a>(f)</p></td>
<td><p>Applies the <code class="docutils literal notranslate"><span class="pre">f</span></code> function to all <a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a> of this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.foreachPartition.html#pyspark.sql.DataFrame.foreachPartition" title="pyspark.sql.DataFrame.foreachPartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.foreachPartition</span></code></a>(f)</p></td>
<td><p>Applies the <code class="docutils literal notranslate"><span class="pre">f</span></code> function to each partition of this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.freqItems.html#pyspark.sql.DataFrame.freqItems" title="pyspark.sql.DataFrame.freqItems"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.freqItems</span></code></a>(cols[, support])</p></td>
<td><p>Finding frequent items for columns, possibly with false positives.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.groupBy.html#pyspark.sql.DataFrame.groupBy" title="pyspark.sql.DataFrame.groupBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.groupBy</span></code></a>(*cols)</p></td>
<td><p>Groups the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified columns, so we can run aggregation on them.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.head.html#pyspark.sql.DataFrame.head" title="pyspark.sql.DataFrame.head"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.head</span></code></a>([n])</p></td>
<td><p>Returns the first <code class="docutils literal notranslate"><span class="pre">n</span></code> rows.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.hint.html#pyspark.sql.DataFrame.hint" title="pyspark.sql.DataFrame.hint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.hint</span></code></a>(name, *parameters)</p></td>
<td><p>Specifies some hint on the current <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.inputFiles.html#pyspark.sql.DataFrame.inputFiles" title="pyspark.sql.DataFrame.inputFiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.inputFiles</span></code></a>()</p></td>
<td><p>Returns a best-effort snapshot of the files that compose this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.intersect.html#pyspark.sql.DataFrame.intersect" title="pyspark.sql.DataFrame.intersect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.intersect</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows only in both this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.intersectAll.html#pyspark.sql.DataFrame.intersectAll" title="pyspark.sql.DataFrame.intersectAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.intersectAll</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows in both this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> while preserving duplicates.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.isLocal.html#pyspark.sql.DataFrame.isLocal" title="pyspark.sql.DataFrame.isLocal"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.isLocal</span></code></a>()</p></td>
<td><p>Returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if the <code class="xref py py-func docutils literal notranslate"><span class="pre">collect()</span></code> and <code class="xref py py-func docutils literal notranslate"><span class="pre">take()</span></code> methods can be run locally (without any Spark executors).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.isStreaming.html#pyspark.sql.DataFrame.isStreaming" title="pyspark.sql.DataFrame.isStreaming"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.isStreaming</span></code></a></p></td>
<td><p>Returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if this <code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code> contains one or more sources that continuously return data as it arrives.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.join.html#pyspark.sql.DataFrame.join" title="pyspark.sql.DataFrame.join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.join</span></code></a>(other[, on, how])</p></td>
<td><p>Joins with another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>, using the given join expression.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.limit.html#pyspark.sql.DataFrame.limit" title="pyspark.sql.DataFrame.limit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.limit</span></code></a>(num)</p></td>
<td><p>Limits the result count to the number specified.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.localCheckpoint.html#pyspark.sql.DataFrame.localCheckpoint" title="pyspark.sql.DataFrame.localCheckpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.localCheckpoint</span></code></a>([eager])</p></td>
<td><p>Returns a locally checkpointed version of this Dataset.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.mapInPandas.html#pyspark.sql.DataFrame.mapInPandas" title="pyspark.sql.DataFrame.mapInPandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mapInPandas</span></code></a>(func, schema)</p></td>
<td><p>Maps an iterator of batches in the current <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using a Python native function that takes and outputs a pandas DataFrame, and returns the result as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.na.html#pyspark.sql.DataFrame.na" title="pyspark.sql.DataFrame.na"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.na</span></code></a></p></td>
<td><p>Returns a <a class="reference internal" href="api/pyspark.sql.DataFrameNaFunctions.html#pyspark.sql.DataFrameNaFunctions" title="pyspark.sql.DataFrameNaFunctions"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrameNaFunctions</span></code></a> for handling missing values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.orderBy.html#pyspark.sql.DataFrame.orderBy" title="pyspark.sql.DataFrame.orderBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.orderBy</span></code></a>(*cols, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> sorted by the specified column(s).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.persist.html#pyspark.sql.DataFrame.persist" title="pyspark.sql.DataFrame.persist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.persist</span></code></a>([storageLevel])</p></td>
<td><p>Sets the storage level to persist the contents of the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> across operations after the first time it is computed.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.printSchema.html#pyspark.sql.DataFrame.printSchema" title="pyspark.sql.DataFrame.printSchema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.printSchema</span></code></a>()</p></td>
<td><p>Prints out the schema in the tree format.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.randomSplit.html#pyspark.sql.DataFrame.randomSplit" title="pyspark.sql.DataFrame.randomSplit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.randomSplit</span></code></a>(weights[, seed])</p></td>
<td><p>Randomly splits this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with the provided weights.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.rdd.html#pyspark.sql.DataFrame.rdd" title="pyspark.sql.DataFrame.rdd"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rdd</span></code></a></p></td>
<td><p>Returns the content as an <a class="reference internal" href="api/pyspark.RDD.html#pyspark.RDD" title="pyspark.RDD"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.RDD</span></code></a> of <a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.registerTempTable.html#pyspark.sql.DataFrame.registerTempTable" title="pyspark.sql.DataFrame.registerTempTable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.registerTempTable</span></code></a>(name)</p></td>
<td><p>Registers this DataFrame as a temporary table using the given name.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.repartition.html#pyspark.sql.DataFrame.repartition" title="pyspark.sql.DataFrame.repartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.repartition</span></code></a>(numPartitions, *cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> partitioned by the given partitioning expressions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.repartitionByRange.html#pyspark.sql.DataFrame.repartitionByRange" title="pyspark.sql.DataFrame.repartitionByRange"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.repartitionByRange</span></code></a>(numPartitions, …)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> partitioned by the given partitioning expressions.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.replace.html#pyspark.sql.DataFrame.replace" title="pyspark.sql.DataFrame.replace"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.replace</span></code></a>(to_replace[, value, subset])</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> replacing a value with another value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.rollup.html#pyspark.sql.DataFrame.rollup" title="pyspark.sql.DataFrame.rollup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rollup</span></code></a>(*cols)</p></td>
<td><p>Create a multi-dimensional rollup for the current <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified columns, so we can run aggregation on them.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.sameSemantics.html#pyspark.sql.DataFrame.sameSemantics" title="pyspark.sql.DataFrame.sameSemantics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sameSemantics</span></code></a>(other)</p></td>
<td><p>Returns <cite>True</cite> when the logical query plans inside both <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>s are equal and therefore return same results.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.sample.html#pyspark.sql.DataFrame.sample" title="pyspark.sql.DataFrame.sample"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sample</span></code></a>([withReplacement, …])</p></td>
<td><p>Returns a sampled subset of this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.sampleBy.html#pyspark.sql.DataFrame.sampleBy" title="pyspark.sql.DataFrame.sampleBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sampleBy</span></code></a>(col, fractions[, seed])</p></td>
<td><p>Returns a stratified sample without replacement based on the fraction given on each stratum.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.schema.html#pyspark.sql.DataFrame.schema" title="pyspark.sql.DataFrame.schema"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.schema</span></code></a></p></td>
<td><p>Returns the schema of this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as a <a class="reference internal" href="api/pyspark.sql.types.StructType.html#pyspark.sql.types.StructType" title="pyspark.sql.types.StructType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.StructType</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.select.html#pyspark.sql.DataFrame.select" title="pyspark.sql.DataFrame.select"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.select</span></code></a>(*cols)</p></td>
<td><p>Projects a set of expressions and returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.selectExpr.html#pyspark.sql.DataFrame.selectExpr" title="pyspark.sql.DataFrame.selectExpr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.selectExpr</span></code></a>(*expr)</p></td>
<td><p>Projects a set of SQL expressions and returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.semanticHash.html#pyspark.sql.DataFrame.semanticHash" title="pyspark.sql.DataFrame.semanticHash"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.semanticHash</span></code></a>()</p></td>
<td><p>Returns a hash code of the logical query plan against this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.show.html#pyspark.sql.DataFrame.show" title="pyspark.sql.DataFrame.show"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.show</span></code></a>([n, truncate, vertical])</p></td>
<td><p>Prints the first <code class="docutils literal notranslate"><span class="pre">n</span></code> rows to the console.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.sort.html#pyspark.sql.DataFrame.sort" title="pyspark.sql.DataFrame.sort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sort</span></code></a>(*cols, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> sorted by the specified column(s).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.sortWithinPartitions.html#pyspark.sql.DataFrame.sortWithinPartitions" title="pyspark.sql.DataFrame.sortWithinPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sortWithinPartitions</span></code></a>(*cols, **kwargs)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with each partition sorted by the specified column(s).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.stat.html#pyspark.sql.DataFrame.stat" title="pyspark.sql.DataFrame.stat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.stat</span></code></a></p></td>
<td><p>Returns a <a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.html#pyspark.sql.DataFrameStatFunctions" title="pyspark.sql.DataFrameStatFunctions"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrameStatFunctions</span></code></a> for statistic functions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.storageLevel.html#pyspark.sql.DataFrame.storageLevel" title="pyspark.sql.DataFrame.storageLevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.storageLevel</span></code></a></p></td>
<td><p>Get the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>’s current storage level.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.subtract.html#pyspark.sql.DataFrame.subtract" title="pyspark.sql.DataFrame.subtract"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.subtract</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing rows in this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> but not in another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.summary.html#pyspark.sql.DataFrame.summary" title="pyspark.sql.DataFrame.summary"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.summary</span></code></a>(*statistics)</p></td>
<td><p>Computes specified statistics for numeric and string columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.tail.html#pyspark.sql.DataFrame.tail" title="pyspark.sql.DataFrame.tail"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.tail</span></code></a>(num)</p></td>
<td><p>Returns the last <code class="docutils literal notranslate"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code> of <a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.take.html#pyspark.sql.DataFrame.take" title="pyspark.sql.DataFrame.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.take</span></code></a>(num)</p></td>
<td><p>Returns the first <code class="docutils literal notranslate"><span class="pre">num</span></code> rows as a <code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code> of <a class="reference internal" href="api/pyspark.sql.Row.html#pyspark.sql.Row" title="pyspark.sql.Row"><code class="xref py py-class docutils literal notranslate"><span class="pre">Row</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.toDF.html#pyspark.sql.DataFrame.toDF" title="pyspark.sql.DataFrame.toDF"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.toDF</span></code></a>(*cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> that with new specified column names</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.toJSON.html#pyspark.sql.DataFrame.toJSON" title="pyspark.sql.DataFrame.toJSON"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.toJSON</span></code></a>([use_unicode])</p></td>
<td><p>Converts a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> into a <code class="xref py py-class docutils literal notranslate"><span class="pre">RDD</span></code> of string.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.toLocalIterator.html#pyspark.sql.DataFrame.toLocalIterator" title="pyspark.sql.DataFrame.toLocalIterator"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.toLocalIterator</span></code></a>([prefetchPartitions])</p></td>
<td><p>Returns an iterator that contains all of the rows in this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.toPandas.html#pyspark.sql.DataFrame.toPandas" title="pyspark.sql.DataFrame.toPandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.toPandas</span></code></a>()</p></td>
<td><p>Returns the contents of this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as Pandas <code class="docutils literal notranslate"><span class="pre">pandas.DataFrame</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.transform.html#pyspark.sql.DataFrame.transform" title="pyspark.sql.DataFrame.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.transform</span></code></a>(func)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.union.html#pyspark.sql.DataFrame.union" title="pyspark.sql.DataFrame.union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.union</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing union of rows in this and another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.unionAll.html#pyspark.sql.DataFrame.unionAll" title="pyspark.sql.DataFrame.unionAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.unionAll</span></code></a>(other)</p></td>
<td><p>Return a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing union of rows in this and another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.unionByName.html#pyspark.sql.DataFrame.unionByName" title="pyspark.sql.DataFrame.unionByName"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.unionByName</span></code></a>(other[, …])</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> containing union of rows in this and another <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.unpersist.html#pyspark.sql.DataFrame.unpersist" title="pyspark.sql.DataFrame.unpersist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.unpersist</span></code></a>([blocking])</p></td>
<td><p>Marks the <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as non-persistent, and remove all blocks for it from memory and disk.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.where.html#pyspark.sql.DataFrame.where" title="pyspark.sql.DataFrame.where"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.where</span></code></a>(condition)</p></td>
<td><p><code class="xref py py-func docutils literal notranslate"><span class="pre">where()</span></code> is an alias for <code class="xref py py-func docutils literal notranslate"><span class="pre">filter()</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.withColumn.html#pyspark.sql.DataFrame.withColumn" title="pyspark.sql.DataFrame.withColumn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.withColumn</span></code></a>(colName, col)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by adding a column or replacing the existing column that has the same name.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.withColumnRenamed.html#pyspark.sql.DataFrame.withColumnRenamed" title="pyspark.sql.DataFrame.withColumnRenamed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.withColumnRenamed</span></code></a>(existing, new)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> by renaming an existing column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.withWatermark.html#pyspark.sql.DataFrame.withWatermark" title="pyspark.sql.DataFrame.withWatermark"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.withWatermark</span></code></a>(eventTime, …)</p></td>
<td><p>Defines an event time watermark for this <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.write.html#pyspark.sql.DataFrame.write" title="pyspark.sql.DataFrame.write"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.write</span></code></a></p></td>
<td><p>Interface for saving the content of the non-streaming <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> out into external storage.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.writeStream.html#pyspark.sql.DataFrame.writeStream" title="pyspark.sql.DataFrame.writeStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.writeStream</span></code></a></p></td>
<td><p>Interface for saving the content of the streaming <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> out into external storage.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrame.writeTo.html#pyspark.sql.DataFrame.writeTo" title="pyspark.sql.DataFrame.writeTo"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.writeTo</span></code></a>(table)</p></td>
<td><p>Create a write configuration builder for v2 sources.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameNaFunctions.drop.html#pyspark.sql.DataFrameNaFunctions.drop" title="pyspark.sql.DataFrameNaFunctions.drop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameNaFunctions.drop</span></code></a>([how, thresh, subset])</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> omitting rows with null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameNaFunctions.fill.html#pyspark.sql.DataFrameNaFunctions.fill" title="pyspark.sql.DataFrameNaFunctions.fill"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameNaFunctions.fill</span></code></a>(value[, subset])</p></td>
<td><p>Replace null values, alias for <code class="docutils literal notranslate"><span class="pre">na.fill()</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameNaFunctions.replace.html#pyspark.sql.DataFrameNaFunctions.replace" title="pyspark.sql.DataFrameNaFunctions.replace"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameNaFunctions.replace</span></code></a>(to_replace[, …])</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> replacing a value with another value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.approxQuantile.html#pyspark.sql.DataFrameStatFunctions.approxQuantile" title="pyspark.sql.DataFrameStatFunctions.approxQuantile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions.approxQuantile</span></code></a>(col, …)</p></td>
<td><p>Calculates the approximate quantiles of numerical columns of a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.corr.html#pyspark.sql.DataFrameStatFunctions.corr" title="pyspark.sql.DataFrameStatFunctions.corr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions.corr</span></code></a>(col1, col2[, method])</p></td>
<td><p>Calculates the correlation of two columns of a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> as a double value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.cov.html#pyspark.sql.DataFrameStatFunctions.cov" title="pyspark.sql.DataFrameStatFunctions.cov"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions.cov</span></code></a>(col1, col2)</p></td>
<td><p>Calculate the sample covariance for the given columns, specified by their names, as a double value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.crosstab.html#pyspark.sql.DataFrameStatFunctions.crosstab" title="pyspark.sql.DataFrameStatFunctions.crosstab"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions.crosstab</span></code></a>(col1, col2)</p></td>
<td><p>Computes a pair-wise frequency table of the given columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.freqItems.html#pyspark.sql.DataFrameStatFunctions.freqItems" title="pyspark.sql.DataFrameStatFunctions.freqItems"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions.freqItems</span></code></a>(cols[, support])</p></td>
<td><p>Finding frequent items for columns, possibly with false positives.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.DataFrameStatFunctions.sampleBy.html#pyspark.sql.DataFrameStatFunctions.sampleBy" title="pyspark.sql.DataFrameStatFunctions.sampleBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameStatFunctions.sampleBy</span></code></a>(col, fractions)</p></td>
<td><p>Returns a stratified sample without replacement based on the fraction given on each stratum.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="column-apis">
<h2>Column APIs<a class="headerlink" href="#column-apis" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.alias.html#pyspark.sql.Column.alias" title="pyspark.sql.Column.alias"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.alias</span></code></a>(*alias, **kwargs)</p></td>
<td><p>Returns this column aliased with a new name or names (in the case of expressions that return more than one column, such as explode).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.asc.html#pyspark.sql.Column.asc" title="pyspark.sql.Column.asc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.asc</span></code></a>()</p></td>
<td><p>Returns a sort expression based on ascending order of the column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.asc_nulls_first.html#pyspark.sql.Column.asc_nulls_first" title="pyspark.sql.Column.asc_nulls_first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.asc_nulls_first</span></code></a>()</p></td>
<td><p>Returns a sort expression based on ascending order of the column, and null values return before non-null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.asc_nulls_last.html#pyspark.sql.Column.asc_nulls_last" title="pyspark.sql.Column.asc_nulls_last"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.asc_nulls_last</span></code></a>()</p></td>
<td><p>Returns a sort expression based on ascending order of the column, and null values appear after non-null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.astype.html#pyspark.sql.Column.astype" title="pyspark.sql.Column.astype"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.astype</span></code></a>(dataType)</p></td>
<td><p><code class="xref py py-func docutils literal notranslate"><span class="pre">astype()</span></code> is an alias for <code class="xref py py-func docutils literal notranslate"><span class="pre">cast()</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.between.html#pyspark.sql.Column.between" title="pyspark.sql.Column.between"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.between</span></code></a>(lowerBound, upperBound)</p></td>
<td><p>A boolean expression that is evaluated to true if the value of this expression is between the given columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.bitwiseAND.html#pyspark.sql.Column.bitwiseAND" title="pyspark.sql.Column.bitwiseAND"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.bitwiseAND</span></code></a>(other)</p></td>
<td><p>Compute bitwise AND of this expression with another expression.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.bitwiseOR.html#pyspark.sql.Column.bitwiseOR" title="pyspark.sql.Column.bitwiseOR"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.bitwiseOR</span></code></a>(other)</p></td>
<td><p>Compute bitwise OR of this expression with another expression.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.bitwiseXOR.html#pyspark.sql.Column.bitwiseXOR" title="pyspark.sql.Column.bitwiseXOR"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.bitwiseXOR</span></code></a>(other)</p></td>
<td><p>Compute bitwise XOR of this expression with another expression.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.cast.html#pyspark.sql.Column.cast" title="pyspark.sql.Column.cast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.cast</span></code></a>(dataType)</p></td>
<td><p>Convert the column into type <code class="docutils literal notranslate"><span class="pre">dataType</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.contains.html#pyspark.sql.Column.contains" title="pyspark.sql.Column.contains"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.contains</span></code></a>(other)</p></td>
<td><p>Contains the other element.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.desc.html#pyspark.sql.Column.desc" title="pyspark.sql.Column.desc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.desc</span></code></a>()</p></td>
<td><p>Returns a sort expression based on the descending order of the column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.desc_nulls_first.html#pyspark.sql.Column.desc_nulls_first" title="pyspark.sql.Column.desc_nulls_first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.desc_nulls_first</span></code></a>()</p></td>
<td><p>Returns a sort expression based on the descending order of the column, and null values appear before non-null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.desc_nulls_last.html#pyspark.sql.Column.desc_nulls_last" title="pyspark.sql.Column.desc_nulls_last"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.desc_nulls_last</span></code></a>()</p></td>
<td><p>Returns a sort expression based on the descending order of the column, and null values appear after non-null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.dropFields.html#pyspark.sql.Column.dropFields" title="pyspark.sql.Column.dropFields"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.dropFields</span></code></a>(*fieldNames)</p></td>
<td><p>An expression that drops fields in <code class="xref py py-class docutils literal notranslate"><span class="pre">StructType</span></code> by name.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.endswith.html#pyspark.sql.Column.endswith" title="pyspark.sql.Column.endswith"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.endswith</span></code></a>(other)</p></td>
<td><p>String ends with.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.eqNullSafe.html#pyspark.sql.Column.eqNullSafe" title="pyspark.sql.Column.eqNullSafe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.eqNullSafe</span></code></a>(other)</p></td>
<td><p>Equality test that is safe for null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.getField.html#pyspark.sql.Column.getField" title="pyspark.sql.Column.getField"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.getField</span></code></a>(name)</p></td>
<td><p>An expression that gets a field by name in a StructField.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.getItem.html#pyspark.sql.Column.getItem" title="pyspark.sql.Column.getItem"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.getItem</span></code></a>(key)</p></td>
<td><p>An expression that gets an item at position <code class="docutils literal notranslate"><span class="pre">ordinal</span></code> out of a list, or gets an item by key out of a dict.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.isNotNull.html#pyspark.sql.Column.isNotNull" title="pyspark.sql.Column.isNotNull"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.isNotNull</span></code></a>()</p></td>
<td><p>True if the current expression is NOT null.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.isNull.html#pyspark.sql.Column.isNull" title="pyspark.sql.Column.isNull"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.isNull</span></code></a>()</p></td>
<td><p>True if the current expression is null.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.isin.html#pyspark.sql.Column.isin" title="pyspark.sql.Column.isin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.isin</span></code></a>(*cols)</p></td>
<td><p>A boolean expression that is evaluated to true if the value of this expression is contained by the evaluated values of the arguments.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.like.html#pyspark.sql.Column.like" title="pyspark.sql.Column.like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.like</span></code></a>(other)</p></td>
<td><p>SQL like expression.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.name.html#pyspark.sql.Column.name" title="pyspark.sql.Column.name"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.name</span></code></a>(*alias, **kwargs)</p></td>
<td><p><code class="xref py py-func docutils literal notranslate"><span class="pre">name()</span></code> is an alias for <code class="xref py py-func docutils literal notranslate"><span class="pre">alias()</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.otherwise.html#pyspark.sql.Column.otherwise" title="pyspark.sql.Column.otherwise"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.otherwise</span></code></a>(value)</p></td>
<td><p>Evaluates a list of conditions and returns one of multiple possible result expressions.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.over.html#pyspark.sql.Column.over" title="pyspark.sql.Column.over"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.over</span></code></a>(window)</p></td>
<td><p>Define a windowing column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.rlike.html#pyspark.sql.Column.rlike" title="pyspark.sql.Column.rlike"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.rlike</span></code></a>(other)</p></td>
<td><p>SQL RLIKE expression (LIKE with Regex).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.startswith.html#pyspark.sql.Column.startswith" title="pyspark.sql.Column.startswith"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.startswith</span></code></a>(other)</p></td>
<td><p>String starts with.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.substr.html#pyspark.sql.Column.substr" title="pyspark.sql.Column.substr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.substr</span></code></a>(startPos, length)</p></td>
<td><p>Return a <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> which is a substring of the column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Column.when.html#pyspark.sql.Column.when" title="pyspark.sql.Column.when"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.when</span></code></a>(condition, value)</p></td>
<td><p>Evaluates a list of conditions and returns one of multiple possible result expressions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Column.withField.html#pyspark.sql.Column.withField" title="pyspark.sql.Column.withField"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Column.withField</span></code></a>(fieldName, col)</p></td>
<td><p>An expression that adds/replaces a field in <code class="xref py py-class docutils literal notranslate"><span class="pre">StructType</span></code> by name.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="data-types">
<h2>Data Types<a class="headerlink" href="#data-types" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.ArrayType.html#pyspark.sql.types.ArrayType" title="pyspark.sql.types.ArrayType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ArrayType</span></code></a>(elementType[, containsNull])</p></td>
<td><p>Array data type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.BinaryType.html#pyspark.sql.types.BinaryType" title="pyspark.sql.types.BinaryType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BinaryType</span></code></a></p></td>
<td><p>Binary (byte array) data type.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.BooleanType.html#pyspark.sql.types.BooleanType" title="pyspark.sql.types.BooleanType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BooleanType</span></code></a></p></td>
<td><p>Boolean data type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.ByteType.html#pyspark.sql.types.ByteType" title="pyspark.sql.types.ByteType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ByteType</span></code></a></p></td>
<td><p>Byte data type, i.e.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.DataType.html#pyspark.sql.types.DataType" title="pyspark.sql.types.DataType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataType</span></code></a></p></td>
<td><p>Base class for data types.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.DateType.html#pyspark.sql.types.DateType" title="pyspark.sql.types.DateType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DateType</span></code></a></p></td>
<td><p>Date (datetime.date) data type.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.DecimalType.html#pyspark.sql.types.DecimalType" title="pyspark.sql.types.DecimalType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DecimalType</span></code></a>([precision, scale])</p></td>
<td><p>Decimal (decimal.Decimal) data type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.DoubleType.html#pyspark.sql.types.DoubleType" title="pyspark.sql.types.DoubleType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DoubleType</span></code></a></p></td>
<td><p>Double data type, representing double precision floats.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.FloatType.html#pyspark.sql.types.FloatType" title="pyspark.sql.types.FloatType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloatType</span></code></a></p></td>
<td><p>Float data type, representing single precision floats.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.IntegerType.html#pyspark.sql.types.IntegerType" title="pyspark.sql.types.IntegerType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IntegerType</span></code></a></p></td>
<td><p>Int data type, i.e.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.LongType.html#pyspark.sql.types.LongType" title="pyspark.sql.types.LongType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LongType</span></code></a></p></td>
<td><p>Long data type, i.e.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.MapType.html#pyspark.sql.types.MapType" title="pyspark.sql.types.MapType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MapType</span></code></a>(keyType, valueType[, valueContainsNull])</p></td>
<td><p>Map data type.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.NullType.html#pyspark.sql.types.NullType" title="pyspark.sql.types.NullType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NullType</span></code></a></p></td>
<td><p>Null type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.ShortType.html#pyspark.sql.types.ShortType" title="pyspark.sql.types.ShortType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ShortType</span></code></a></p></td>
<td><p>Short data type, i.e.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.StringType.html#pyspark.sql.types.StringType" title="pyspark.sql.types.StringType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StringType</span></code></a></p></td>
<td><p>String data type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.StructField.html#pyspark.sql.types.StructField" title="pyspark.sql.types.StructField"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StructField</span></code></a>(name, dataType[, nullable, metadata])</p></td>
<td><p>A field in <a class="reference internal" href="api/pyspark.sql.types.StructType.html#pyspark.sql.types.StructType" title="pyspark.sql.types.StructType"><code class="xref py py-class docutils literal notranslate"><span class="pre">StructType</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.types.StructType.html#pyspark.sql.types.StructType" title="pyspark.sql.types.StructType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StructType</span></code></a>([fields])</p></td>
<td><p>Struct type, consisting of a list of <a class="reference internal" href="api/pyspark.sql.types.StructField.html#pyspark.sql.types.StructField" title="pyspark.sql.types.StructField"><code class="xref py py-class docutils literal notranslate"><span class="pre">StructField</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.types.TimestampType.html#pyspark.sql.types.TimestampType" title="pyspark.sql.types.TimestampType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TimestampType</span></code></a></p></td>
<td><p>Timestamp (datetime.datetime) data type.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="row">
<h2>Row<a class="headerlink" href="#row" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Row.asDict.html#pyspark.sql.Row.asDict" title="pyspark.sql.Row.asDict"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Row.asDict</span></code></a>([recursive])</p></td>
<td><p>Return as a dict</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="functions">
<h2>Functions<a class="headerlink" href="#functions" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.abs.html#pyspark.sql.functions.abs" title="pyspark.sql.functions.abs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">abs</span></code></a>(col)</p></td>
<td><p>Computes the absolute value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.acos.html#pyspark.sql.functions.acos" title="pyspark.sql.functions.acos"><code class="xref py py-obj docutils literal notranslate"><span class="pre">acos</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.acosh.html#pyspark.sql.functions.acosh" title="pyspark.sql.functions.acosh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">acosh</span></code></a>(col)</p></td>
<td><p>Computes inverse hyperbolic cosine of the input column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.add_months.html#pyspark.sql.functions.add_months" title="pyspark.sql.functions.add_months"><code class="xref py py-obj docutils literal notranslate"><span class="pre">add_months</span></code></a>(start, months)</p></td>
<td><p>Returns the date that is <cite>months</cite> months after <cite>start</cite></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.aggregate.html#pyspark.sql.functions.aggregate" title="pyspark.sql.functions.aggregate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">aggregate</span></code></a>(col, initialValue, merge[, finish])</p></td>
<td><p>Applies a binary operator to an initial state and all elements in the array, and reduces this to a single state.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.approxCountDistinct.html#pyspark.sql.functions.approxCountDistinct" title="pyspark.sql.functions.approxCountDistinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">approxCountDistinct</span></code></a>(col[, rsd])</p></td>
<td><p><div class="deprecated">
<p><span class="versionmodified deprecated">Deprecated since version 2.1.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.approx_count_distinct.html#pyspark.sql.functions.approx_count_distinct" title="pyspark.sql.functions.approx_count_distinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">approx_count_distinct</span></code></a>(col[, rsd])</p></td>
<td><p>Aggregate function: returns a new <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> for approximate distinct count of column <cite>col</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array.html#pyspark.sql.functions.array" title="pyspark.sql.functions.array"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array</span></code></a>(*cols)</p></td>
<td><p>Creates a new array column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_contains.html#pyspark.sql.functions.array_contains" title="pyspark.sql.functions.array_contains"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_contains</span></code></a>(col, value)</p></td>
<td><p>Collection function: returns null if the array is null, true if the array contains the given value, and false otherwise.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_distinct.html#pyspark.sql.functions.array_distinct" title="pyspark.sql.functions.array_distinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_distinct</span></code></a>(col)</p></td>
<td><p>Collection function: removes duplicate values from the array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_except.html#pyspark.sql.functions.array_except" title="pyspark.sql.functions.array_except"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_except</span></code></a>(col1, col2)</p></td>
<td><p>Collection function: returns an array of the elements in col1 but not in col2, without duplicates.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_intersect.html#pyspark.sql.functions.array_intersect" title="pyspark.sql.functions.array_intersect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_intersect</span></code></a>(col1, col2)</p></td>
<td><p>Collection function: returns an array of the elements in the intersection of col1 and col2, without duplicates.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_join.html#pyspark.sql.functions.array_join" title="pyspark.sql.functions.array_join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_join</span></code></a>(col, delimiter[, null_replacement])</p></td>
<td><p>Concatenates the elements of <cite>column</cite> using the <cite>delimiter</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_max.html#pyspark.sql.functions.array_max" title="pyspark.sql.functions.array_max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_max</span></code></a>(col)</p></td>
<td><p>Collection function: returns the maximum value of the array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_min.html#pyspark.sql.functions.array_min" title="pyspark.sql.functions.array_min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_min</span></code></a>(col)</p></td>
<td><p>Collection function: returns the minimum value of the array.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_position.html#pyspark.sql.functions.array_position" title="pyspark.sql.functions.array_position"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_position</span></code></a>(col, value)</p></td>
<td><p>Collection function: Locates the position of the first occurrence of the given value in the given array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_remove.html#pyspark.sql.functions.array_remove" title="pyspark.sql.functions.array_remove"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_remove</span></code></a>(col, element)</p></td>
<td><p>Collection function: Remove all elements that equal to element from the given array.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_repeat.html#pyspark.sql.functions.array_repeat" title="pyspark.sql.functions.array_repeat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_repeat</span></code></a>(col, count)</p></td>
<td><p>Collection function: creates an array containing a column repeated count times.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_sort.html#pyspark.sql.functions.array_sort" title="pyspark.sql.functions.array_sort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_sort</span></code></a>(col)</p></td>
<td><p>Collection function: sorts the input array in ascending order.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.array_union.html#pyspark.sql.functions.array_union" title="pyspark.sql.functions.array_union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">array_union</span></code></a>(col1, col2)</p></td>
<td><p>Collection function: returns an array of the elements in the union of col1 and col2, without duplicates.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.arrays_overlap.html#pyspark.sql.functions.arrays_overlap" title="pyspark.sql.functions.arrays_overlap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">arrays_overlap</span></code></a>(a1, a2)</p></td>
<td><p>Collection function: returns true if the arrays contain any common non-null element; if not, returns null if both the arrays are non-empty and any of them contains a null element; returns false otherwise.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.arrays_zip.html#pyspark.sql.functions.arrays_zip" title="pyspark.sql.functions.arrays_zip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">arrays_zip</span></code></a>(*cols)</p></td>
<td><p>Collection function: Returns a merged array of structs in which the N-th struct contains all N-th values of input arrays.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.asc.html#pyspark.sql.functions.asc" title="pyspark.sql.functions.asc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">asc</span></code></a>(col)</p></td>
<td><p>Returns a sort expression based on the ascending order of the given column name.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.asc_nulls_first.html#pyspark.sql.functions.asc_nulls_first" title="pyspark.sql.functions.asc_nulls_first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">asc_nulls_first</span></code></a>(col)</p></td>
<td><p>Returns a sort expression based on the ascending order of the given column name, and null values return before non-null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.asc_nulls_last.html#pyspark.sql.functions.asc_nulls_last" title="pyspark.sql.functions.asc_nulls_last"><code class="xref py py-obj docutils literal notranslate"><span class="pre">asc_nulls_last</span></code></a>(col)</p></td>
<td><p>Returns a sort expression based on the ascending order of the given column name, and null values appear after non-null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.ascii.html#pyspark.sql.functions.ascii" title="pyspark.sql.functions.ascii"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ascii</span></code></a>(col)</p></td>
<td><p>Computes the numeric value of the first character of the string column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.asin.html#pyspark.sql.functions.asin" title="pyspark.sql.functions.asin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">asin</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.3.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.asinh.html#pyspark.sql.functions.asinh" title="pyspark.sql.functions.asinh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">asinh</span></code></a>(col)</p></td>
<td><p>Computes inverse hyperbolic sine of the input column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.assert_true.html#pyspark.sql.functions.assert_true" title="pyspark.sql.functions.assert_true"><code class="xref py py-obj docutils literal notranslate"><span class="pre">assert_true</span></code></a>(col[, errMsg])</p></td>
<td><p>Returns null if the input column is true; throws an exception with the provided error message otherwise.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.atan.html#pyspark.sql.functions.atan" title="pyspark.sql.functions.atan"><code class="xref py py-obj docutils literal notranslate"><span class="pre">atan</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.atanh.html#pyspark.sql.functions.atanh" title="pyspark.sql.functions.atanh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">atanh</span></code></a>(col)</p></td>
<td><p>Computes inverse hyperbolic tangent of the input column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.atan2.html#pyspark.sql.functions.atan2" title="pyspark.sql.functions.atan2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">atan2</span></code></a>(col1, col2)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.avg.html#pyspark.sql.functions.avg" title="pyspark.sql.functions.avg"><code class="xref py py-obj docutils literal notranslate"><span class="pre">avg</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the average of the values in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.base64.html#pyspark.sql.functions.base64" title="pyspark.sql.functions.base64"><code class="xref py py-obj docutils literal notranslate"><span class="pre">base64</span></code></a>(col)</p></td>
<td><p>Computes the BASE64 encoding of a binary column and returns it as a string column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.bin.html#pyspark.sql.functions.bin" title="pyspark.sql.functions.bin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">bin</span></code></a>(col)</p></td>
<td><p>Returns the string representation of the binary value of the given column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.bitwiseNOT.html#pyspark.sql.functions.bitwiseNOT" title="pyspark.sql.functions.bitwiseNOT"><code class="xref py py-obj docutils literal notranslate"><span class="pre">bitwiseNOT</span></code></a>(col)</p></td>
<td><p>Computes bitwise not.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.broadcast.html#pyspark.sql.functions.broadcast" title="pyspark.sql.functions.broadcast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">broadcast</span></code></a>(df)</p></td>
<td><p>Marks a DataFrame as small enough for use in broadcast joins.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.bround.html#pyspark.sql.functions.bround" title="pyspark.sql.functions.bround"><code class="xref py py-obj docutils literal notranslate"><span class="pre">bround</span></code></a>(col[, scale])</p></td>
<td><p>Round the given value to <cite>scale</cite> decimal places using HALF_EVEN rounding mode if <cite>scale</cite> &gt;= 0 or at integral part when <cite>scale</cite> &lt; 0.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.bucket.html#pyspark.sql.functions.bucket" title="pyspark.sql.functions.bucket"><code class="xref py py-obj docutils literal notranslate"><span class="pre">bucket</span></code></a>(numBuckets, col)</p></td>
<td><p>Partition transform function: A transform for any type that partitions by a hash of the input column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.cbrt.html#pyspark.sql.functions.cbrt" title="pyspark.sql.functions.cbrt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cbrt</span></code></a>(col)</p></td>
<td><p>Computes the cube-root of the given value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.ceil.html#pyspark.sql.functions.ceil" title="pyspark.sql.functions.ceil"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ceil</span></code></a>(col)</p></td>
<td><p>Computes the ceiling of the given value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.coalesce.html#pyspark.sql.functions.coalesce" title="pyspark.sql.functions.coalesce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">coalesce</span></code></a>(*cols)</p></td>
<td><p>Returns the first column that is not null.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.col.html#pyspark.sql.functions.col" title="pyspark.sql.functions.col"><code class="xref py py-obj docutils literal notranslate"><span class="pre">col</span></code></a>(col)</p></td>
<td><p>Returns a <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> based on the given column name.’</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.collect_list.html#pyspark.sql.functions.collect_list" title="pyspark.sql.functions.collect_list"><code class="xref py py-obj docutils literal notranslate"><span class="pre">collect_list</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns a list of objects with duplicates.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.collect_set.html#pyspark.sql.functions.collect_set" title="pyspark.sql.functions.collect_set"><code class="xref py py-obj docutils literal notranslate"><span class="pre">collect_set</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns a set of objects with duplicate elements eliminated.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.column.html#pyspark.sql.functions.column" title="pyspark.sql.functions.column"><code class="xref py py-obj docutils literal notranslate"><span class="pre">column</span></code></a>(col)</p></td>
<td><p>Returns a <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> based on the given column name.’</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.concat.html#pyspark.sql.functions.concat" title="pyspark.sql.functions.concat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">concat</span></code></a>(*cols)</p></td>
<td><p>Concatenates multiple input columns together into a single column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.concat_ws.html#pyspark.sql.functions.concat_ws" title="pyspark.sql.functions.concat_ws"><code class="xref py py-obj docutils literal notranslate"><span class="pre">concat_ws</span></code></a>(sep, *cols)</p></td>
<td><p>Concatenates multiple input string columns together into a single string column, using the given separator.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.conv.html#pyspark.sql.functions.conv" title="pyspark.sql.functions.conv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">conv</span></code></a>(col, fromBase, toBase)</p></td>
<td><p>Convert a number in a string column from one base to another.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.corr.html#pyspark.sql.functions.corr" title="pyspark.sql.functions.corr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">corr</span></code></a>(col1, col2)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> for the Pearson Correlation Coefficient for <code class="docutils literal notranslate"><span class="pre">col1</span></code> and <code class="docutils literal notranslate"><span class="pre">col2</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.cos.html#pyspark.sql.functions.cos" title="pyspark.sql.functions.cos"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cos</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.cosh.html#pyspark.sql.functions.cosh" title="pyspark.sql.functions.cosh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cosh</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.count.html#pyspark.sql.functions.count" title="pyspark.sql.functions.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">count</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the number of items in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.countDistinct.html#pyspark.sql.functions.countDistinct" title="pyspark.sql.functions.countDistinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">countDistinct</span></code></a>(col, *cols)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> for distinct count of <code class="docutils literal notranslate"><span class="pre">col</span></code> or <code class="docutils literal notranslate"><span class="pre">cols</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.covar_pop.html#pyspark.sql.functions.covar_pop" title="pyspark.sql.functions.covar_pop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">covar_pop</span></code></a>(col1, col2)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> for the population covariance of <code class="docutils literal notranslate"><span class="pre">col1</span></code> and <code class="docutils literal notranslate"><span class="pre">col2</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.covar_samp.html#pyspark.sql.functions.covar_samp" title="pyspark.sql.functions.covar_samp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">covar_samp</span></code></a>(col1, col2)</p></td>
<td><p>Returns a new <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> for the sample covariance of <code class="docutils literal notranslate"><span class="pre">col1</span></code> and <code class="docutils literal notranslate"><span class="pre">col2</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.crc32.html#pyspark.sql.functions.crc32" title="pyspark.sql.functions.crc32"><code class="xref py py-obj docutils literal notranslate"><span class="pre">crc32</span></code></a>(col)</p></td>
<td><p>Calculates the cyclic redundancy check value (CRC32) of a binary column and returns the value as a bigint.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.create_map.html#pyspark.sql.functions.create_map" title="pyspark.sql.functions.create_map"><code class="xref py py-obj docutils literal notranslate"><span class="pre">create_map</span></code></a>(*cols)</p></td>
<td><p>Creates a new map column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.cume_dist.html#pyspark.sql.functions.cume_dist" title="pyspark.sql.functions.cume_dist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">cume_dist</span></code></a>()</p></td>
<td><p>Window function: returns the cumulative distribution of values within a window partition, i.e.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.current_date.html#pyspark.sql.functions.current_date" title="pyspark.sql.functions.current_date"><code class="xref py py-obj docutils literal notranslate"><span class="pre">current_date</span></code></a>()</p></td>
<td><p>Returns the current date at the start of query evaluation as a <code class="xref py py-class docutils literal notranslate"><span class="pre">DateType</span></code> column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.current_timestamp.html#pyspark.sql.functions.current_timestamp" title="pyspark.sql.functions.current_timestamp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">current_timestamp</span></code></a>()</p></td>
<td><p>Returns the current timestamp at the start of query evaluation as a <code class="xref py py-class docutils literal notranslate"><span class="pre">TimestampType</span></code> column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.date_add.html#pyspark.sql.functions.date_add" title="pyspark.sql.functions.date_add"><code class="xref py py-obj docutils literal notranslate"><span class="pre">date_add</span></code></a>(start, days)</p></td>
<td><p>Returns the date that is <cite>days</cite> days after <cite>start</cite></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.date_format.html#pyspark.sql.functions.date_format" title="pyspark.sql.functions.date_format"><code class="xref py py-obj docutils literal notranslate"><span class="pre">date_format</span></code></a>(date, format)</p></td>
<td><p>Converts a date/timestamp/string to a value of string in the format specified by the date format given by the second argument.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.date_sub.html#pyspark.sql.functions.date_sub" title="pyspark.sql.functions.date_sub"><code class="xref py py-obj docutils literal notranslate"><span class="pre">date_sub</span></code></a>(start, days)</p></td>
<td><p>Returns the date that is <cite>days</cite> days before <cite>start</cite></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.date_trunc.html#pyspark.sql.functions.date_trunc" title="pyspark.sql.functions.date_trunc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">date_trunc</span></code></a>(format, timestamp)</p></td>
<td><p>Returns timestamp truncated to the unit specified by the format.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.datediff.html#pyspark.sql.functions.datediff" title="pyspark.sql.functions.datediff"><code class="xref py py-obj docutils literal notranslate"><span class="pre">datediff</span></code></a>(end, start)</p></td>
<td><p>Returns the number of days from <cite>start</cite> to <cite>end</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.dayofmonth.html#pyspark.sql.functions.dayofmonth" title="pyspark.sql.functions.dayofmonth"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dayofmonth</span></code></a>(col)</p></td>
<td><p>Extract the day of the month of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.dayofweek.html#pyspark.sql.functions.dayofweek" title="pyspark.sql.functions.dayofweek"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dayofweek</span></code></a>(col)</p></td>
<td><p>Extract the day of the week of a given date as integer.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.dayofyear.html#pyspark.sql.functions.dayofyear" title="pyspark.sql.functions.dayofyear"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dayofyear</span></code></a>(col)</p></td>
<td><p>Extract the day of the year of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.days.html#pyspark.sql.functions.days" title="pyspark.sql.functions.days"><code class="xref py py-obj docutils literal notranslate"><span class="pre">days</span></code></a>(col)</p></td>
<td><p>Partition transform function: A transform for timestamps and dates to partition data into days.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.decode.html#pyspark.sql.functions.decode" title="pyspark.sql.functions.decode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">decode</span></code></a>(col, charset)</p></td>
<td><p>Computes the first argument into a string from a binary using the provided character set (one of ‘US-ASCII’, ‘ISO-8859-1’, ‘UTF-8’, ‘UTF-16BE’, ‘UTF-16LE’, ‘UTF-16’).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.degrees.html#pyspark.sql.functions.degrees" title="pyspark.sql.functions.degrees"><code class="xref py py-obj docutils literal notranslate"><span class="pre">degrees</span></code></a>(col)</p></td>
<td><p>Converts an angle measured in radians to an approximately equivalent angle measured in degrees.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.dense_rank.html#pyspark.sql.functions.dense_rank" title="pyspark.sql.functions.dense_rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dense_rank</span></code></a>()</p></td>
<td><p>Window function: returns the rank of rows within a window partition, without any gaps.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.desc.html#pyspark.sql.functions.desc" title="pyspark.sql.functions.desc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">desc</span></code></a>(col)</p></td>
<td><p>Returns a sort expression based on the descending order of the given column name.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.desc_nulls_first.html#pyspark.sql.functions.desc_nulls_first" title="pyspark.sql.functions.desc_nulls_first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">desc_nulls_first</span></code></a>(col)</p></td>
<td><p>Returns a sort expression based on the descending order of the given column name, and null values appear before non-null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.desc_nulls_last.html#pyspark.sql.functions.desc_nulls_last" title="pyspark.sql.functions.desc_nulls_last"><code class="xref py py-obj docutils literal notranslate"><span class="pre">desc_nulls_last</span></code></a>(col)</p></td>
<td><p>Returns a sort expression based on the descending order of the given column name, and null values appear after non-null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.element_at.html#pyspark.sql.functions.element_at" title="pyspark.sql.functions.element_at"><code class="xref py py-obj docutils literal notranslate"><span class="pre">element_at</span></code></a>(col, extraction)</p></td>
<td><p>Collection function: Returns element of array at given index in extraction if col is array.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.encode.html#pyspark.sql.functions.encode" title="pyspark.sql.functions.encode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">encode</span></code></a>(col, charset)</p></td>
<td><p>Computes the first argument into a binary from a string using the provided character set (one of ‘US-ASCII’, ‘ISO-8859-1’, ‘UTF-8’, ‘UTF-16BE’, ‘UTF-16LE’, ‘UTF-16’).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.exists.html#pyspark.sql.functions.exists" title="pyspark.sql.functions.exists"><code class="xref py py-obj docutils literal notranslate"><span class="pre">exists</span></code></a>(col, f)</p></td>
<td><p>Returns whether a predicate holds for one or more elements in the array.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.exp.html#pyspark.sql.functions.exp" title="pyspark.sql.functions.exp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">exp</span></code></a>(col)</p></td>
<td><p>Computes the exponential of the given value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.explode.html#pyspark.sql.functions.explode" title="pyspark.sql.functions.explode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">explode</span></code></a>(col)</p></td>
<td><p>Returns a new row for each element in the given array or map.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.explode_outer.html#pyspark.sql.functions.explode_outer" title="pyspark.sql.functions.explode_outer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">explode_outer</span></code></a>(col)</p></td>
<td><p>Returns a new row for each element in the given array or map.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.expm1.html#pyspark.sql.functions.expm1" title="pyspark.sql.functions.expm1"><code class="xref py py-obj docutils literal notranslate"><span class="pre">expm1</span></code></a>(col)</p></td>
<td><p>Computes the exponential of the given value minus one.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.expr.html#pyspark.sql.functions.expr" title="pyspark.sql.functions.expr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">expr</span></code></a>(str)</p></td>
<td><p>Parses the expression string into the column that it represents</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.factorial.html#pyspark.sql.functions.factorial" title="pyspark.sql.functions.factorial"><code class="xref py py-obj docutils literal notranslate"><span class="pre">factorial</span></code></a>(col)</p></td>
<td><p>Computes the factorial of the given value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.filter.html#pyspark.sql.functions.filter" title="pyspark.sql.functions.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">filter</span></code></a>(col, f)</p></td>
<td><p>Returns an array of elements for which a predicate holds in a given array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.first.html#pyspark.sql.functions.first" title="pyspark.sql.functions.first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">first</span></code></a>(col[, ignorenulls])</p></td>
<td><p>Aggregate function: returns the first value in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.flatten.html#pyspark.sql.functions.flatten" title="pyspark.sql.functions.flatten"><code class="xref py py-obj docutils literal notranslate"><span class="pre">flatten</span></code></a>(col)</p></td>
<td><p>Collection function: creates a single array from an array of arrays.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.floor.html#pyspark.sql.functions.floor" title="pyspark.sql.functions.floor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">floor</span></code></a>(col)</p></td>
<td><p>Computes the floor of the given value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.forall.html#pyspark.sql.functions.forall" title="pyspark.sql.functions.forall"><code class="xref py py-obj docutils literal notranslate"><span class="pre">forall</span></code></a>(col, f)</p></td>
<td><p>Returns whether a predicate holds for every element in the array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.format_number.html#pyspark.sql.functions.format_number" title="pyspark.sql.functions.format_number"><code class="xref py py-obj docutils literal notranslate"><span class="pre">format_number</span></code></a>(col, d)</p></td>
<td><p>Formats the number X to a format like ‘#,–#,–#.–’, rounded to d decimal places with HALF_EVEN round mode, and returns the result as a string.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.format_string.html#pyspark.sql.functions.format_string" title="pyspark.sql.functions.format_string"><code class="xref py py-obj docutils literal notranslate"><span class="pre">format_string</span></code></a>(format, *cols)</p></td>
<td><p>Formats the arguments in printf-style and returns the result as a string column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.from_csv.html#pyspark.sql.functions.from_csv" title="pyspark.sql.functions.from_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_csv</span></code></a>(col, schema[, options])</p></td>
<td><p>Parses a column containing a CSV string to a row with the specified schema.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.from_json.html#pyspark.sql.functions.from_json" title="pyspark.sql.functions.from_json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_json</span></code></a>(col, schema[, options])</p></td>
<td><p>Parses a column containing a JSON string into a <code class="xref py py-class docutils literal notranslate"><span class="pre">MapType</span></code> with <code class="xref py py-class docutils literal notranslate"><span class="pre">StringType</span></code> as keys type, <code class="xref py py-class docutils literal notranslate"><span class="pre">StructType</span></code> or <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrayType</span></code> with the specified schema.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.from_unixtime.html#pyspark.sql.functions.from_unixtime" title="pyspark.sql.functions.from_unixtime"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_unixtime</span></code></a>(timestamp[, format])</p></td>
<td><p>Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string representing the timestamp of that moment in the current system time zone in the given format.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.from_utc_timestamp.html#pyspark.sql.functions.from_utc_timestamp" title="pyspark.sql.functions.from_utc_timestamp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_utc_timestamp</span></code></a>(timestamp, tz)</p></td>
<td><p>This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.get_json_object.html#pyspark.sql.functions.get_json_object" title="pyspark.sql.functions.get_json_object"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_json_object</span></code></a>(col, path)</p></td>
<td><p>Extracts json object from a json string based on json path specified, and returns json string of the extracted json object.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.greatest.html#pyspark.sql.functions.greatest" title="pyspark.sql.functions.greatest"><code class="xref py py-obj docutils literal notranslate"><span class="pre">greatest</span></code></a>(*cols)</p></td>
<td><p>Returns the greatest value of the list of column names, skipping null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.grouping.html#pyspark.sql.functions.grouping" title="pyspark.sql.functions.grouping"><code class="xref py py-obj docutils literal notranslate"><span class="pre">grouping</span></code></a>(col)</p></td>
<td><p>Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or not, returns 1 for aggregated or 0 for not aggregated in the result set.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.grouping_id.html#pyspark.sql.functions.grouping_id" title="pyspark.sql.functions.grouping_id"><code class="xref py py-obj docutils literal notranslate"><span class="pre">grouping_id</span></code></a>(*cols)</p></td>
<td><p>Aggregate function: returns the level of grouping, equals to</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.hash.html#pyspark.sql.functions.hash" title="pyspark.sql.functions.hash"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hash</span></code></a>(*cols)</p></td>
<td><p>Calculates the hash code of given columns, and returns the result as an int column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.hex.html#pyspark.sql.functions.hex" title="pyspark.sql.functions.hex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hex</span></code></a>(col)</p></td>
<td><p>Computes hex value of the given column, which could be <a class="reference internal" href="api/pyspark.sql.types.StringType.html#pyspark.sql.types.StringType" title="pyspark.sql.types.StringType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.StringType</span></code></a>, <a class="reference internal" href="api/pyspark.sql.types.BinaryType.html#pyspark.sql.types.BinaryType" title="pyspark.sql.types.BinaryType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.BinaryType</span></code></a>, <a class="reference internal" href="api/pyspark.sql.types.IntegerType.html#pyspark.sql.types.IntegerType" title="pyspark.sql.types.IntegerType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.IntegerType</span></code></a> or <a class="reference internal" href="api/pyspark.sql.types.LongType.html#pyspark.sql.types.LongType" title="pyspark.sql.types.LongType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.LongType</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.hour.html#pyspark.sql.functions.hour" title="pyspark.sql.functions.hour"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hour</span></code></a>(col)</p></td>
<td><p>Extract the hours of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.hours.html#pyspark.sql.functions.hours" title="pyspark.sql.functions.hours"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hours</span></code></a>(col)</p></td>
<td><p>Partition transform function: A transform for timestamps to partition data into hours.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.hypot.html#pyspark.sql.functions.hypot" title="pyspark.sql.functions.hypot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hypot</span></code></a>(col1, col2)</p></td>
<td><p>Computes <code class="docutils literal notranslate"><span class="pre">sqrt(a^2</span> <span class="pre">+</span> <span class="pre">b^2)</span></code> without intermediate overflow or underflow.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.initcap.html#pyspark.sql.functions.initcap" title="pyspark.sql.functions.initcap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">initcap</span></code></a>(col)</p></td>
<td><p>Translate the first letter of each word to upper case in the sentence.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.input_file_name.html#pyspark.sql.functions.input_file_name" title="pyspark.sql.functions.input_file_name"><code class="xref py py-obj docutils literal notranslate"><span class="pre">input_file_name</span></code></a>()</p></td>
<td><p>Creates a string column for the file name of the current Spark task.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.instr.html#pyspark.sql.functions.instr" title="pyspark.sql.functions.instr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">instr</span></code></a>(str, substr)</p></td>
<td><p>Locate the position of the first occurrence of substr column in the given string.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.isnan.html#pyspark.sql.functions.isnan" title="pyspark.sql.functions.isnan"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isnan</span></code></a>(col)</p></td>
<td><p>An expression that returns true iff the column is NaN.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.isnull.html#pyspark.sql.functions.isnull" title="pyspark.sql.functions.isnull"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isnull</span></code></a>(col)</p></td>
<td><p>An expression that returns true iff the column is null.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.json_tuple.html#pyspark.sql.functions.json_tuple" title="pyspark.sql.functions.json_tuple"><code class="xref py py-obj docutils literal notranslate"><span class="pre">json_tuple</span></code></a>(col, *fields)</p></td>
<td><p>Creates a new row for a json column according to the given field names.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.kurtosis.html#pyspark.sql.functions.kurtosis" title="pyspark.sql.functions.kurtosis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">kurtosis</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the kurtosis of the values in a group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.lag.html#pyspark.sql.functions.lag" title="pyspark.sql.functions.lag"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lag</span></code></a>(col[, offset, default])</p></td>
<td><p>Window function: returns the value that is <cite>offset</cite> rows before the current row, and <cite>default</cite> if there is less than <cite>offset</cite> rows before the current row.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.last.html#pyspark.sql.functions.last" title="pyspark.sql.functions.last"><code class="xref py py-obj docutils literal notranslate"><span class="pre">last</span></code></a>(col[, ignorenulls])</p></td>
<td><p>Aggregate function: returns the last value in a group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.last_day.html#pyspark.sql.functions.last_day" title="pyspark.sql.functions.last_day"><code class="xref py py-obj docutils literal notranslate"><span class="pre">last_day</span></code></a>(date)</p></td>
<td><p>Returns the last day of the month which the given date belongs to.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.lead.html#pyspark.sql.functions.lead" title="pyspark.sql.functions.lead"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lead</span></code></a>(col[, offset, default])</p></td>
<td><p>Window function: returns the value that is <cite>offset</cite> rows after the current row, and <cite>default</cite> if there is less than <cite>offset</cite> rows after the current row.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.least.html#pyspark.sql.functions.least" title="pyspark.sql.functions.least"><code class="xref py py-obj docutils literal notranslate"><span class="pre">least</span></code></a>(*cols)</p></td>
<td><p>Returns the least value of the list of column names, skipping null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.length.html#pyspark.sql.functions.length" title="pyspark.sql.functions.length"><code class="xref py py-obj docutils literal notranslate"><span class="pre">length</span></code></a>(col)</p></td>
<td><p>Computes the character length of string data or number of bytes of binary data.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.levenshtein.html#pyspark.sql.functions.levenshtein" title="pyspark.sql.functions.levenshtein"><code class="xref py py-obj docutils literal notranslate"><span class="pre">levenshtein</span></code></a>(left, right)</p></td>
<td><p>Computes the Levenshtein distance of the two given strings.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.lit.html#pyspark.sql.functions.lit" title="pyspark.sql.functions.lit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lit</span></code></a>(col)</p></td>
<td><p>Creates a <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> of literal value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.locate.html#pyspark.sql.functions.locate" title="pyspark.sql.functions.locate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">locate</span></code></a>(substr, str[, pos])</p></td>
<td><p>Locate the position of the first occurrence of substr in a string column, after position pos.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.log.html#pyspark.sql.functions.log" title="pyspark.sql.functions.log"><code class="xref py py-obj docutils literal notranslate"><span class="pre">log</span></code></a>(arg1[, arg2])</p></td>
<td><p>Returns the first argument-based logarithm of the second argument.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.log10.html#pyspark.sql.functions.log10" title="pyspark.sql.functions.log10"><code class="xref py py-obj docutils literal notranslate"><span class="pre">log10</span></code></a>(col)</p></td>
<td><p>Computes the logarithm of the given value in Base 10.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.log1p.html#pyspark.sql.functions.log1p" title="pyspark.sql.functions.log1p"><code class="xref py py-obj docutils literal notranslate"><span class="pre">log1p</span></code></a>(col)</p></td>
<td><p>Computes the natural logarithm of the given value plus one.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.log2.html#pyspark.sql.functions.log2" title="pyspark.sql.functions.log2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">log2</span></code></a>(col)</p></td>
<td><p>Returns the base-2 logarithm of the argument.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.lower.html#pyspark.sql.functions.lower" title="pyspark.sql.functions.lower"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lower</span></code></a>(col)</p></td>
<td><p>Converts a string expression to lower case.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.lpad.html#pyspark.sql.functions.lpad" title="pyspark.sql.functions.lpad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lpad</span></code></a>(col, len, pad)</p></td>
<td><p>Left-pad the string column to width <cite>len</cite> with <cite>pad</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.ltrim.html#pyspark.sql.functions.ltrim" title="pyspark.sql.functions.ltrim"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ltrim</span></code></a>(col)</p></td>
<td><p>Trim the spaces from left end for the specified string value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_concat.html#pyspark.sql.functions.map_concat" title="pyspark.sql.functions.map_concat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_concat</span></code></a>(*cols)</p></td>
<td><p>Returns the union of all the given maps.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_entries.html#pyspark.sql.functions.map_entries" title="pyspark.sql.functions.map_entries"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_entries</span></code></a>(col)</p></td>
<td><p>Collection function: Returns an unordered array of all entries in the given map.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_filter.html#pyspark.sql.functions.map_filter" title="pyspark.sql.functions.map_filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_filter</span></code></a>(col, f)</p></td>
<td><p>Returns a map whose key-value pairs satisfy a predicate.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_from_arrays.html#pyspark.sql.functions.map_from_arrays" title="pyspark.sql.functions.map_from_arrays"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_from_arrays</span></code></a>(col1, col2)</p></td>
<td><p>Creates a new map from two arrays.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_from_entries.html#pyspark.sql.functions.map_from_entries" title="pyspark.sql.functions.map_from_entries"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_from_entries</span></code></a>(col)</p></td>
<td><p>Collection function: Returns a map created from the given array of entries.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_keys.html#pyspark.sql.functions.map_keys" title="pyspark.sql.functions.map_keys"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_keys</span></code></a>(col)</p></td>
<td><p>Collection function: Returns an unordered array containing the keys of the map.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_values.html#pyspark.sql.functions.map_values" title="pyspark.sql.functions.map_values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_values</span></code></a>(col)</p></td>
<td><p>Collection function: Returns an unordered array containing the values of the map.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.map_zip_with.html#pyspark.sql.functions.map_zip_with" title="pyspark.sql.functions.map_zip_with"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_zip_with</span></code></a>(col1, col2, f)</p></td>
<td><p>Merge two given maps, key-wise into a single map using a function.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.max.html#pyspark.sql.functions.max" title="pyspark.sql.functions.max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the maximum value of the expression in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.md5.html#pyspark.sql.functions.md5" title="pyspark.sql.functions.md5"><code class="xref py py-obj docutils literal notranslate"><span class="pre">md5</span></code></a>(col)</p></td>
<td><p>Calculates the MD5 digest and returns the value as a 32 character hex string.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.mean.html#pyspark.sql.functions.mean" title="pyspark.sql.functions.mean"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the average of the values in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.min.html#pyspark.sql.functions.min" title="pyspark.sql.functions.min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">min</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the minimum value of the expression in a group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.minute.html#pyspark.sql.functions.minute" title="pyspark.sql.functions.minute"><code class="xref py py-obj docutils literal notranslate"><span class="pre">minute</span></code></a>(col)</p></td>
<td><p>Extract the minutes of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.monotonically_increasing_id.html#pyspark.sql.functions.monotonically_increasing_id" title="pyspark.sql.functions.monotonically_increasing_id"><code class="xref py py-obj docutils literal notranslate"><span class="pre">monotonically_increasing_id</span></code></a>()</p></td>
<td><p>A column that generates monotonically increasing 64-bit integers.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.month.html#pyspark.sql.functions.month" title="pyspark.sql.functions.month"><code class="xref py py-obj docutils literal notranslate"><span class="pre">month</span></code></a>(col)</p></td>
<td><p>Extract the month of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.months.html#pyspark.sql.functions.months" title="pyspark.sql.functions.months"><code class="xref py py-obj docutils literal notranslate"><span class="pre">months</span></code></a>(col)</p></td>
<td><p>Partition transform function: A transform for timestamps and dates to partition data into months.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.months_between.html#pyspark.sql.functions.months_between" title="pyspark.sql.functions.months_between"><code class="xref py py-obj docutils literal notranslate"><span class="pre">months_between</span></code></a>(date1, date2[, roundOff])</p></td>
<td><p>Returns number of months between dates date1 and date2.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.nanvl.html#pyspark.sql.functions.nanvl" title="pyspark.sql.functions.nanvl"><code class="xref py py-obj docutils literal notranslate"><span class="pre">nanvl</span></code></a>(col1, col2)</p></td>
<td><p>Returns col1 if it is not NaN, or col2 if col1 is NaN.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.next_day.html#pyspark.sql.functions.next_day" title="pyspark.sql.functions.next_day"><code class="xref py py-obj docutils literal notranslate"><span class="pre">next_day</span></code></a>(date, dayOfWeek)</p></td>
<td><p>Returns the first date which is later than the value of the date column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.nth_value.html#pyspark.sql.functions.nth_value" title="pyspark.sql.functions.nth_value"><code class="xref py py-obj docutils literal notranslate"><span class="pre">nth_value</span></code></a>(col, offset[, ignoreNulls])</p></td>
<td><p>Window function: returns the value that is the <cite>offset</cite>th row of the window frame (counting from 1), and <cite>null</cite> if the size of window frame is less than <cite>offset</cite> rows.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.ntile.html#pyspark.sql.functions.ntile" title="pyspark.sql.functions.ntile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ntile</span></code></a>(n)</p></td>
<td><p>Window function: returns the ntile group id (from 1 to <cite>n</cite> inclusive) in an ordered window partition.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.overlay.html#pyspark.sql.functions.overlay" title="pyspark.sql.functions.overlay"><code class="xref py py-obj docutils literal notranslate"><span class="pre">overlay</span></code></a>(src, replace, pos[, len])</p></td>
<td><p>Overlay the specified portion of <cite>src</cite> with <cite>replace</cite>, starting from byte position <cite>pos</cite> of <cite>src</cite> and proceeding for <cite>len</cite> bytes.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.pandas_udf.html#pyspark.sql.functions.pandas_udf" title="pyspark.sql.functions.pandas_udf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pandas_udf</span></code></a>([f, returnType, functionType])</p></td>
<td><p>Creates a pandas user defined function (a.k.a.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.percent_rank.html#pyspark.sql.functions.percent_rank" title="pyspark.sql.functions.percent_rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">percent_rank</span></code></a>()</p></td>
<td><p>Window function: returns the relative rank (i.e.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.percentile_approx.html#pyspark.sql.functions.percentile_approx" title="pyspark.sql.functions.percentile_approx"><code class="xref py py-obj docutils literal notranslate"><span class="pre">percentile_approx</span></code></a>(col, percentage[, accuracy])</p></td>
<td><p>Returns the approximate <cite>percentile</cite> of the numeric column <cite>col</cite> which is the smallest value in the ordered <cite>col</cite> values (sorted from least to greatest) such that no more than <cite>percentage</cite> of <cite>col</cite> values is less than the value or equal to that value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.posexplode.html#pyspark.sql.functions.posexplode" title="pyspark.sql.functions.posexplode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">posexplode</span></code></a>(col)</p></td>
<td><p>Returns a new row for each element with position in the given array or map.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.posexplode_outer.html#pyspark.sql.functions.posexplode_outer" title="pyspark.sql.functions.posexplode_outer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">posexplode_outer</span></code></a>(col)</p></td>
<td><p>Returns a new row for each element with position in the given array or map.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.pow.html#pyspark.sql.functions.pow" title="pyspark.sql.functions.pow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pow</span></code></a>(col1, col2)</p></td>
<td><p>Returns the value of the first argument raised to the power of the second argument.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.quarter.html#pyspark.sql.functions.quarter" title="pyspark.sql.functions.quarter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quarter</span></code></a>(col)</p></td>
<td><p>Extract the quarter of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.radians.html#pyspark.sql.functions.radians" title="pyspark.sql.functions.radians"><code class="xref py py-obj docutils literal notranslate"><span class="pre">radians</span></code></a>(col)</p></td>
<td><p>Converts an angle measured in degrees to an approximately equivalent angle measured in radians.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.raise_error.html#pyspark.sql.functions.raise_error" title="pyspark.sql.functions.raise_error"><code class="xref py py-obj docutils literal notranslate"><span class="pre">raise_error</span></code></a>(errMsg)</p></td>
<td><p>Throws an exception with the provided error message.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.rand.html#pyspark.sql.functions.rand" title="pyspark.sql.functions.rand"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rand</span></code></a>([seed])</p></td>
<td><p>Generates a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.randn.html#pyspark.sql.functions.randn" title="pyspark.sql.functions.randn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">randn</span></code></a>([seed])</p></td>
<td><p>Generates a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.rank.html#pyspark.sql.functions.rank" title="pyspark.sql.functions.rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rank</span></code></a>()</p></td>
<td><p>Window function: returns the rank of rows within a window partition.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.regexp_extract.html#pyspark.sql.functions.regexp_extract" title="pyspark.sql.functions.regexp_extract"><code class="xref py py-obj docutils literal notranslate"><span class="pre">regexp_extract</span></code></a>(str, pattern, idx)</p></td>
<td><p>Extract a specific group matched by a Java regex, from the specified string column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.regexp_replace.html#pyspark.sql.functions.regexp_replace" title="pyspark.sql.functions.regexp_replace"><code class="xref py py-obj docutils literal notranslate"><span class="pre">regexp_replace</span></code></a>(str, pattern, replacement)</p></td>
<td><p>Replace all substrings of the specified string value that match regexp with rep.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.repeat.html#pyspark.sql.functions.repeat" title="pyspark.sql.functions.repeat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">repeat</span></code></a>(col, n)</p></td>
<td><p>Repeats a string column n times, and returns it as a new string column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.reverse.html#pyspark.sql.functions.reverse" title="pyspark.sql.functions.reverse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reverse</span></code></a>(col)</p></td>
<td><p>Collection function: returns a reversed string or an array with reverse order of elements.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.rint.html#pyspark.sql.functions.rint" title="pyspark.sql.functions.rint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rint</span></code></a>(col)</p></td>
<td><p>Returns the double value that is closest in value to the argument and is equal to a mathematical integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.round.html#pyspark.sql.functions.round" title="pyspark.sql.functions.round"><code class="xref py py-obj docutils literal notranslate"><span class="pre">round</span></code></a>(col[, scale])</p></td>
<td><p>Round the given value to <cite>scale</cite> decimal places using HALF_UP rounding mode if <cite>scale</cite> &gt;= 0 or at integral part when <cite>scale</cite> &lt; 0.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.row_number.html#pyspark.sql.functions.row_number" title="pyspark.sql.functions.row_number"><code class="xref py py-obj docutils literal notranslate"><span class="pre">row_number</span></code></a>()</p></td>
<td><p>Window function: returns a sequential number starting at 1 within a window partition.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.rpad.html#pyspark.sql.functions.rpad" title="pyspark.sql.functions.rpad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rpad</span></code></a>(col, len, pad)</p></td>
<td><p>Right-pad the string column to width <cite>len</cite> with <cite>pad</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.rtrim.html#pyspark.sql.functions.rtrim" title="pyspark.sql.functions.rtrim"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rtrim</span></code></a>(col)</p></td>
<td><p>Trim the spaces from right end for the specified string value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.schema_of_csv.html#pyspark.sql.functions.schema_of_csv" title="pyspark.sql.functions.schema_of_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">schema_of_csv</span></code></a>(csv[, options])</p></td>
<td><p>Parses a CSV string and infers its schema in DDL format.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.schema_of_json.html#pyspark.sql.functions.schema_of_json" title="pyspark.sql.functions.schema_of_json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">schema_of_json</span></code></a>(json[, options])</p></td>
<td><p>Parses a JSON string and infers its schema in DDL format.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.second.html#pyspark.sql.functions.second" title="pyspark.sql.functions.second"><code class="xref py py-obj docutils literal notranslate"><span class="pre">second</span></code></a>(col)</p></td>
<td><p>Extract the seconds of a given date as integer.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sequence.html#pyspark.sql.functions.sequence" title="pyspark.sql.functions.sequence"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sequence</span></code></a>(start, stop[, step])</p></td>
<td><p>Generate a sequence of integers from <cite>start</cite> to <cite>stop</cite>, incrementing by <cite>step</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sha1.html#pyspark.sql.functions.sha1" title="pyspark.sql.functions.sha1"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sha1</span></code></a>(col)</p></td>
<td><p>Returns the hex string result of SHA-1.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sha2.html#pyspark.sql.functions.sha2" title="pyspark.sql.functions.sha2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sha2</span></code></a>(col, numBits)</p></td>
<td><p>Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.shiftLeft.html#pyspark.sql.functions.shiftLeft" title="pyspark.sql.functions.shiftLeft"><code class="xref py py-obj docutils literal notranslate"><span class="pre">shiftLeft</span></code></a>(col, numBits)</p></td>
<td><p>Shift the given value numBits left.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.shiftRight.html#pyspark.sql.functions.shiftRight" title="pyspark.sql.functions.shiftRight"><code class="xref py py-obj docutils literal notranslate"><span class="pre">shiftRight</span></code></a>(col, numBits)</p></td>
<td><p>(Signed) shift the given value numBits right.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.shiftRightUnsigned.html#pyspark.sql.functions.shiftRightUnsigned" title="pyspark.sql.functions.shiftRightUnsigned"><code class="xref py py-obj docutils literal notranslate"><span class="pre">shiftRightUnsigned</span></code></a>(col, numBits)</p></td>
<td><p>Unsigned shift the given value numBits right.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.shuffle.html#pyspark.sql.functions.shuffle" title="pyspark.sql.functions.shuffle"><code class="xref py py-obj docutils literal notranslate"><span class="pre">shuffle</span></code></a>(col)</p></td>
<td><p>Collection function: Generates a random permutation of the given array.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.signum.html#pyspark.sql.functions.signum" title="pyspark.sql.functions.signum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">signum</span></code></a>(col)</p></td>
<td><p>Computes the signum of the given value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sin.html#pyspark.sql.functions.sin" title="pyspark.sql.functions.sin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sin</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sinh.html#pyspark.sql.functions.sinh" title="pyspark.sql.functions.sinh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sinh</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.size.html#pyspark.sql.functions.size" title="pyspark.sql.functions.size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">size</span></code></a>(col)</p></td>
<td><p>Collection function: returns the length of the array or map stored in the column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.skewness.html#pyspark.sql.functions.skewness" title="pyspark.sql.functions.skewness"><code class="xref py py-obj docutils literal notranslate"><span class="pre">skewness</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the skewness of the values in a group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.slice.html#pyspark.sql.functions.slice" title="pyspark.sql.functions.slice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">slice</span></code></a>(x, start, length)</p></td>
<td><p>Collection function: returns an array containing all the elements in <cite>x</cite> from index <cite>start</cite> (array indices start at 1, or from the end if <cite>start</cite> is negative) with the specified <cite>length</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sort_array.html#pyspark.sql.functions.sort_array" title="pyspark.sql.functions.sort_array"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sort_array</span></code></a>(col[, asc])</p></td>
<td><p>Collection function: sorts the input array in ascending or descending order according to the natural ordering of the array elements.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.soundex.html#pyspark.sql.functions.soundex" title="pyspark.sql.functions.soundex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">soundex</span></code></a>(col)</p></td>
<td><p>Returns the SoundEx encoding for a string</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.spark_partition_id.html#pyspark.sql.functions.spark_partition_id" title="pyspark.sql.functions.spark_partition_id"><code class="xref py py-obj docutils literal notranslate"><span class="pre">spark_partition_id</span></code></a>()</p></td>
<td><p>A column for partition ID.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.split.html#pyspark.sql.functions.split" title="pyspark.sql.functions.split"><code class="xref py py-obj docutils literal notranslate"><span class="pre">split</span></code></a>(str, pattern[, limit])</p></td>
<td><p>Splits str around matches of the given pattern.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sqrt.html#pyspark.sql.functions.sqrt" title="pyspark.sql.functions.sqrt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sqrt</span></code></a>(col)</p></td>
<td><p>Computes the square root of the specified float value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.stddev.html#pyspark.sql.functions.stddev" title="pyspark.sql.functions.stddev"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stddev</span></code></a>(col)</p></td>
<td><p>Aggregate function: alias for stddev_samp.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.stddev_pop.html#pyspark.sql.functions.stddev_pop" title="pyspark.sql.functions.stddev_pop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stddev_pop</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns population standard deviation of the expression in a group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.stddev_samp.html#pyspark.sql.functions.stddev_samp" title="pyspark.sql.functions.stddev_samp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stddev_samp</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the unbiased sample standard deviation of the expression in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.struct.html#pyspark.sql.functions.struct" title="pyspark.sql.functions.struct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">struct</span></code></a>(*cols)</p></td>
<td><p>Creates a new struct column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.substring.html#pyspark.sql.functions.substring" title="pyspark.sql.functions.substring"><code class="xref py py-obj docutils literal notranslate"><span class="pre">substring</span></code></a>(str, pos, len)</p></td>
<td><p>Substring starts at <cite>pos</cite> and is of length <cite>len</cite> when str is String type or returns the slice of byte array that starts at <cite>pos</cite> in byte and is of length <cite>len</cite> when str is Binary type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.substring_index.html#pyspark.sql.functions.substring_index" title="pyspark.sql.functions.substring_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">substring_index</span></code></a>(str, delim, count)</p></td>
<td><p>Returns the substring from string str before count occurrences of the delimiter delim.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sum.html#pyspark.sql.functions.sum" title="pyspark.sql.functions.sum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sum</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the sum of all values in the expression.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.sumDistinct.html#pyspark.sql.functions.sumDistinct" title="pyspark.sql.functions.sumDistinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sumDistinct</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the sum of distinct values in the expression.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.tan.html#pyspark.sql.functions.tan" title="pyspark.sql.functions.tan"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tan</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.tanh.html#pyspark.sql.functions.tanh" title="pyspark.sql.functions.tanh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tanh</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 1.4.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.timestamp_seconds.html#pyspark.sql.functions.timestamp_seconds" title="pyspark.sql.functions.timestamp_seconds"><code class="xref py py-obj docutils literal notranslate"><span class="pre">timestamp_seconds</span></code></a>(col)</p></td>
<td><p><div class="versionadded">
<p><span class="versionmodified added">New in version 3.1.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.toDegrees.html#pyspark.sql.functions.toDegrees" title="pyspark.sql.functions.toDegrees"><code class="xref py py-obj docutils literal notranslate"><span class="pre">toDegrees</span></code></a>(col)</p></td>
<td><p><div class="deprecated">
<p><span class="versionmodified deprecated">Deprecated since version 2.1.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.toRadians.html#pyspark.sql.functions.toRadians" title="pyspark.sql.functions.toRadians"><code class="xref py py-obj docutils literal notranslate"><span class="pre">toRadians</span></code></a>(col)</p></td>
<td><p><div class="deprecated">
<p><span class="versionmodified deprecated">Deprecated since version 2.1.0.</span></p>
</div>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.to_csv.html#pyspark.sql.functions.to_csv" title="pyspark.sql.functions.to_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_csv</span></code></a>(col[, options])</p></td>
<td><p>Converts a column containing a <code class="xref py py-class docutils literal notranslate"><span class="pre">StructType</span></code> into a CSV string.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.to_date.html#pyspark.sql.functions.to_date" title="pyspark.sql.functions.to_date"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_date</span></code></a>(col[, format])</p></td>
<td><p>Converts a <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> into <a class="reference internal" href="api/pyspark.sql.types.DateType.html#pyspark.sql.types.DateType" title="pyspark.sql.types.DateType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.DateType</span></code></a> using the optionally specified format.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.to_json.html#pyspark.sql.functions.to_json" title="pyspark.sql.functions.to_json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_json</span></code></a>(col[, options])</p></td>
<td><p>Converts a column containing a <code class="xref py py-class docutils literal notranslate"><span class="pre">StructType</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrayType</span></code> or a <code class="xref py py-class docutils literal notranslate"><span class="pre">MapType</span></code> into a JSON string.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.to_timestamp.html#pyspark.sql.functions.to_timestamp" title="pyspark.sql.functions.to_timestamp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_timestamp</span></code></a>(col[, format])</p></td>
<td><p>Converts a <a class="reference internal" href="api/pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> into <a class="reference internal" href="api/pyspark.sql.types.TimestampType.html#pyspark.sql.types.TimestampType" title="pyspark.sql.types.TimestampType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.TimestampType</span></code></a> using the optionally specified format.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.to_utc_timestamp.html#pyspark.sql.functions.to_utc_timestamp" title="pyspark.sql.functions.to_utc_timestamp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_utc_timestamp</span></code></a>(timestamp, tz)</p></td>
<td><p>This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.transform.html#pyspark.sql.functions.transform" title="pyspark.sql.functions.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transform</span></code></a>(col, f)</p></td>
<td><p>Returns an array of elements after applying a transformation to each element in the input array.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.transform_keys.html#pyspark.sql.functions.transform_keys" title="pyspark.sql.functions.transform_keys"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transform_keys</span></code></a>(col, f)</p></td>
<td><p>Applies a function to every key-value pair in a map and returns a map with the results of those applications as the new keys for the pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.transform_values.html#pyspark.sql.functions.transform_values" title="pyspark.sql.functions.transform_values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transform_values</span></code></a>(col, f)</p></td>
<td><p>Applies a function to every key-value pair in a map and returns a map with the results of those applications as the new values for the pairs.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.translate.html#pyspark.sql.functions.translate" title="pyspark.sql.functions.translate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">translate</span></code></a>(srcCol, matching, replace)</p></td>
<td><p>A function translate any character in the <cite>srcCol</cite> by a character in <cite>matching</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.trim.html#pyspark.sql.functions.trim" title="pyspark.sql.functions.trim"><code class="xref py py-obj docutils literal notranslate"><span class="pre">trim</span></code></a>(col)</p></td>
<td><p>Trim the spaces from both ends for the specified string column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.trunc.html#pyspark.sql.functions.trunc" title="pyspark.sql.functions.trunc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">trunc</span></code></a>(date, format)</p></td>
<td><p>Returns date truncated to the unit specified by the format.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.udf.html#pyspark.sql.functions.udf" title="pyspark.sql.functions.udf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">udf</span></code></a>([f, returnType])</p></td>
<td><p>Creates a user defined function (UDF).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.unbase64.html#pyspark.sql.functions.unbase64" title="pyspark.sql.functions.unbase64"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unbase64</span></code></a>(col)</p></td>
<td><p>Decodes a BASE64 encoded string column and returns it as a binary column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.unhex.html#pyspark.sql.functions.unhex" title="pyspark.sql.functions.unhex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unhex</span></code></a>(col)</p></td>
<td><p>Inverse of hex.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.unix_timestamp.html#pyspark.sql.functions.unix_timestamp" title="pyspark.sql.functions.unix_timestamp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unix_timestamp</span></code></a>([timestamp, format])</p></td>
<td><p>Convert time string with given pattern (‘yyyy-MM-dd HH:mm:ss’, by default) to Unix time stamp (in seconds), using the default timezone and the default locale, return null if fail.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.upper.html#pyspark.sql.functions.upper" title="pyspark.sql.functions.upper"><code class="xref py py-obj docutils literal notranslate"><span class="pre">upper</span></code></a>(col)</p></td>
<td><p>Converts a string expression to upper case.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.var_pop.html#pyspark.sql.functions.var_pop" title="pyspark.sql.functions.var_pop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">var_pop</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the population variance of the values in a group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.var_samp.html#pyspark.sql.functions.var_samp" title="pyspark.sql.functions.var_samp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">var_samp</span></code></a>(col)</p></td>
<td><p>Aggregate function: returns the unbiased sample variance of the values in a group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.variance.html#pyspark.sql.functions.variance" title="pyspark.sql.functions.variance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">variance</span></code></a>(col)</p></td>
<td><p>Aggregate function: alias for var_samp</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.weekofyear.html#pyspark.sql.functions.weekofyear" title="pyspark.sql.functions.weekofyear"><code class="xref py py-obj docutils literal notranslate"><span class="pre">weekofyear</span></code></a>(col)</p></td>
<td><p>Extract the week number of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.when.html#pyspark.sql.functions.when" title="pyspark.sql.functions.when"><code class="xref py py-obj docutils literal notranslate"><span class="pre">when</span></code></a>(condition, value)</p></td>
<td><p>Evaluates a list of conditions and returns one of multiple possible result expressions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.window.html#pyspark.sql.functions.window" title="pyspark.sql.functions.window"><code class="xref py py-obj docutils literal notranslate"><span class="pre">window</span></code></a>(timeColumn, windowDuration[, …])</p></td>
<td><p>Bucketize rows into one or more time windows given a timestamp specifying column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.xxhash64.html#pyspark.sql.functions.xxhash64" title="pyspark.sql.functions.xxhash64"><code class="xref py py-obj docutils literal notranslate"><span class="pre">xxhash64</span></code></a>(*cols)</p></td>
<td><p>Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm, and returns the result as a long column.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.year.html#pyspark.sql.functions.year" title="pyspark.sql.functions.year"><code class="xref py py-obj docutils literal notranslate"><span class="pre">year</span></code></a>(col)</p></td>
<td><p>Extract the year of a given date as integer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.functions.years.html#pyspark.sql.functions.years" title="pyspark.sql.functions.years"><code class="xref py py-obj docutils literal notranslate"><span class="pre">years</span></code></a>(col)</p></td>
<td><p>Partition transform function: A transform for timestamps and dates to partition data into years.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.functions.zip_with.html#pyspark.sql.functions.zip_with" title="pyspark.sql.functions.zip_with"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zip_with</span></code></a>(left, right, f)</p></td>
<td><p>Merge two given arrays, element-wise, into a single array using a function.</p></td>
</tr>
</tbody>
</table>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.avro.functions.from_avro.html#pyspark.sql.avro.functions.from_avro" title="pyspark.sql.avro.functions.from_avro"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_avro</span></code></a>(data, jsonFormatSchema[, options])</p></td>
<td><p>Converts a binary column of Avro format into its corresponding catalyst value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.avro.functions.to_avro.html#pyspark.sql.avro.functions.to_avro" title="pyspark.sql.avro.functions.to_avro"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_avro</span></code></a>(data[, jsonFormatSchema])</p></td>
<td><p>Converts a column into binary of avro format.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="window">
<h2>Window<a class="headerlink" href="#window" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Window.currentRow.html#pyspark.sql.Window.currentRow" title="pyspark.sql.Window.currentRow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.currentRow</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Window.orderBy.html#pyspark.sql.Window.orderBy" title="pyspark.sql.Window.orderBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.orderBy</span></code></a>(*cols)</p></td>
<td><p>Creates a <code class="xref py py-class docutils literal notranslate"><span class="pre">WindowSpec</span></code> with the ordering defined.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Window.partitionBy.html#pyspark.sql.Window.partitionBy" title="pyspark.sql.Window.partitionBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.partitionBy</span></code></a>(*cols)</p></td>
<td><p>Creates a <code class="xref py py-class docutils literal notranslate"><span class="pre">WindowSpec</span></code> with the partitioning defined.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Window.rangeBetween.html#pyspark.sql.Window.rangeBetween" title="pyspark.sql.Window.rangeBetween"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.rangeBetween</span></code></a>(start, end)</p></td>
<td><p>Creates a <code class="xref py py-class docutils literal notranslate"><span class="pre">WindowSpec</span></code> with the frame boundaries defined, from <cite>start</cite> (inclusive) to <cite>end</cite> (inclusive).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Window.rowsBetween.html#pyspark.sql.Window.rowsBetween" title="pyspark.sql.Window.rowsBetween"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.rowsBetween</span></code></a>(start, end)</p></td>
<td><p>Creates a <code class="xref py py-class docutils literal notranslate"><span class="pre">WindowSpec</span></code> with the frame boundaries defined, from <cite>start</cite> (inclusive) to <cite>end</cite> (inclusive).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.Window.unboundedFollowing.html#pyspark.sql.Window.unboundedFollowing" title="pyspark.sql.Window.unboundedFollowing"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.unboundedFollowing</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.Window.unboundedPreceding.html#pyspark.sql.Window.unboundedPreceding" title="pyspark.sql.Window.unboundedPreceding"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Window.unboundedPreceding</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.WindowSpec.orderBy.html#pyspark.sql.WindowSpec.orderBy" title="pyspark.sql.WindowSpec.orderBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">WindowSpec.orderBy</span></code></a>(*cols)</p></td>
<td><p>Defines the ordering columns in a <code class="xref py py-class docutils literal notranslate"><span class="pre">WindowSpec</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.WindowSpec.partitionBy.html#pyspark.sql.WindowSpec.partitionBy" title="pyspark.sql.WindowSpec.partitionBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">WindowSpec.partitionBy</span></code></a>(*cols)</p></td>
<td><p>Defines the partitioning columns in a <code class="xref py py-class docutils literal notranslate"><span class="pre">WindowSpec</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.WindowSpec.rangeBetween.html#pyspark.sql.WindowSpec.rangeBetween" title="pyspark.sql.WindowSpec.rangeBetween"><code class="xref py py-obj docutils literal notranslate"><span class="pre">WindowSpec.rangeBetween</span></code></a>(start, end)</p></td>
<td><p>Defines the frame boundaries, from <cite>start</cite> (inclusive) to <cite>end</cite> (inclusive).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.WindowSpec.rowsBetween.html#pyspark.sql.WindowSpec.rowsBetween" title="pyspark.sql.WindowSpec.rowsBetween"><code class="xref py py-obj docutils literal notranslate"><span class="pre">WindowSpec.rowsBetween</span></code></a>(start, end)</p></td>
<td><p>Defines the frame boundaries, from <cite>start</cite> (inclusive) to <cite>end</cite> (inclusive).</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="grouping">
<h2>Grouping<a class="headerlink" href="#grouping" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.agg.html#pyspark.sql.GroupedData.agg" title="pyspark.sql.GroupedData.agg"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.agg</span></code></a>(*exprs)</p></td>
<td><p>Compute aggregates and returns the result as a <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.apply.html#pyspark.sql.GroupedData.apply" title="pyspark.sql.GroupedData.apply"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.apply</span></code></a>(udf)</p></td>
<td><p>It is an alias of <a class="reference internal" href="api/pyspark.sql.GroupedData.applyInPandas.html#pyspark.sql.GroupedData.applyInPandas" title="pyspark.sql.GroupedData.applyInPandas"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pyspark.sql.GroupedData.applyInPandas()</span></code></a>; however, it takes a <a class="reference internal" href="api/pyspark.sql.functions.pandas_udf.html#pyspark.sql.functions.pandas_udf" title="pyspark.sql.functions.pandas_udf"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pyspark.sql.functions.pandas_udf()</span></code></a> whereas <a class="reference internal" href="api/pyspark.sql.GroupedData.applyInPandas.html#pyspark.sql.GroupedData.applyInPandas" title="pyspark.sql.GroupedData.applyInPandas"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pyspark.sql.GroupedData.applyInPandas()</span></code></a> takes a Python native function.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.applyInPandas.html#pyspark.sql.GroupedData.applyInPandas" title="pyspark.sql.GroupedData.applyInPandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.applyInPandas</span></code></a>(func, schema)</p></td>
<td><p>Maps each group of the current <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using a pandas udf and returns the result as a <cite>DataFrame</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.avg.html#pyspark.sql.GroupedData.avg" title="pyspark.sql.GroupedData.avg"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.avg</span></code></a>(*cols)</p></td>
<td><p>Computes average values for each numeric columns for each group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.cogroup.html#pyspark.sql.GroupedData.cogroup" title="pyspark.sql.GroupedData.cogroup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.cogroup</span></code></a>(other)</p></td>
<td><p>Cogroups this group with another group so that we can run cogrouped operations.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.count.html#pyspark.sql.GroupedData.count" title="pyspark.sql.GroupedData.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.count</span></code></a>()</p></td>
<td><p>Counts the number of records for each group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.max.html#pyspark.sql.GroupedData.max" title="pyspark.sql.GroupedData.max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.max</span></code></a>(*cols)</p></td>
<td><p>Computes the max value for each numeric columns for each group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.mean.html#pyspark.sql.GroupedData.mean" title="pyspark.sql.GroupedData.mean"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.mean</span></code></a>(*cols)</p></td>
<td><p>Computes average values for each numeric columns for each group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.min.html#pyspark.sql.GroupedData.min" title="pyspark.sql.GroupedData.min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.min</span></code></a>(*cols)</p></td>
<td><p>Computes the min value for each numeric column for each group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.pivot.html#pyspark.sql.GroupedData.pivot" title="pyspark.sql.GroupedData.pivot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.pivot</span></code></a>(pivot_col[, values])</p></td>
<td><p>Pivots a column of the current <a class="reference internal" href="api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and perform the specified aggregation.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.sql.GroupedData.sum.html#pyspark.sql.GroupedData.sum" title="pyspark.sql.GroupedData.sum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GroupedData.sum</span></code></a>(*cols)</p></td>
<td><p>Compute the sum for each numeric columns for each group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.sql.PandasCogroupedOps.applyInPandas.html#pyspark.sql.PandasCogroupedOps.applyInPandas" title="pyspark.sql.PandasCogroupedOps.applyInPandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PandasCogroupedOps.applyInPandas</span></code></a>(func, schema)</p></td>
<td><p>Applies a function to each cogroup using pandas and returns the result as a <cite>DataFrame</cite>.</p></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="index.html" title="previous page">API Reference</a>
<a class='right-next' id="next-link" href="api/pyspark.sql.SparkSession.html" title="next page">pyspark.sql.SparkSession</a>
</div>
</main>
</div>
</div>
<script src="../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>