blob: 5241dc816a35a61f87666ebd5fe4b1305c0e570f [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>DataFrame &#8212; PySpark 3.4.2 documentation</title>
<link rel="stylesheet" href="../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/language_data.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/frame.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="pyspark.pandas.DataFrame" href="api/pyspark.pandas.DataFrame.html" />
<link rel="prev" title="pyspark.pandas.Series.pandas_on_spark.transform_batch" href="api/pyspark.pandas.Series.pandas_on_spark.transform_batch.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../index.html">Overview</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../user_guide/index.html">User Guides</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../migration_guide/index.html">Migration Guides</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="../pyspark.sql/index.html">Spark SQL</a>
</li>
<li class="active">
<a href="index.html">Pandas API on Spark</a>
<ul>
<li class="">
<a href="io.html">Input/Output</a>
</li>
<li class="">
<a href="general_functions.html">General functions</a>
</li>
<li class="">
<a href="series.html">Series</a>
</li>
<li class="active">
<a href="">DataFrame</a>
</li>
<li class="">
<a href="indexing.html">Index objects</a>
</li>
<li class="">
<a href="window.html">Window</a>
</li>
<li class="">
<a href="groupby.html">GroupBy</a>
</li>
<li class="">
<a href="resampling.html">Resampling</a>
</li>
<li class="">
<a href="ml.html">Machine Learning utilities</a>
</li>
<li class="">
<a href="extensions.html">Extensions</a>
</li>
</ul>
</li>
<li class="">
<a href="../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="">
<a href="../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../pyspark.streaming.html">Spark Streaming (Legacy)</a>
</li>
<li class="">
<a href="../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../pyspark.resource.html">Resource Management</a>
</li>
<li class="">
<a href="../pyspark.errors.html">Errors</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
<li class="nav-item toc-entry toc-h2">
<a href="#constructor" class="nav-link">Constructor</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#attributes-and-underlying-data" class="nav-link">Attributes and underlying data</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#conversion" class="nav-link">Conversion</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#indexing-iteration" class="nav-link">Indexing, iteration</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#binary-operator-functions" class="nav-link">Binary operator functions</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#function-application-groupby-window" class="nav-link">Function application, GroupBy & Window</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#computations-descriptive-stats" class="nav-link">Computations / Descriptive Stats</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#reindexing-selection-label-manipulation" class="nav-link">Reindexing / Selection / Label manipulation</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#missing-data-handling" class="nav-link">Missing data handling</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#reshaping-sorting-transposing" class="nav-link">Reshaping, sorting, transposing</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#combining-joining-merging" class="nav-link">Combining / joining / merging</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#time-series-related" class="nav-link">Time series-related</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#serialization-io-conversion" class="nav-link">Serialization / IO / Conversion</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#spark-related" class="nav-link">Spark-related</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#plotting" class="nav-link">Plotting</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#pandas-on-spark-specific" class="nav-link">Pandas-on-Spark specific</a>
</li>
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="dataframe">
<span id="api-dataframe"></span><h1>DataFrame<a class="headerlink" href="#dataframe" title="Permalink to this headline"></a></h1>
<div class="section" id="constructor">
<h2>Constructor<a class="headerlink" href="#constructor" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.html#pyspark.pandas.DataFrame" title="pyspark.pandas.DataFrame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame</span></code></a>([data, index, columns, dtype, copy])</p></td>
<td><p>pandas-on-Spark DataFrame that corresponds to pandas DataFrame logically.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="attributes-and-underlying-data">
<h2>Attributes and underlying data<a class="headerlink" href="#attributes-and-underlying-data" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.index.html#pyspark.pandas.DataFrame.index" title="pyspark.pandas.DataFrame.index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.index</span></code></a></p></td>
<td><p>The index (row labels) Column of the DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.columns.html#pyspark.pandas.DataFrame.columns" title="pyspark.pandas.DataFrame.columns"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.columns</span></code></a></p></td>
<td><p>The column labels of the DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.empty.html#pyspark.pandas.DataFrame.empty" title="pyspark.pandas.DataFrame.empty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.empty</span></code></a></p></td>
<td><p>Returns true if the current DataFrame is empty.</p></td>
</tr>
</tbody>
</table>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.dtypes.html#pyspark.pandas.DataFrame.dtypes" title="pyspark.pandas.DataFrame.dtypes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.dtypes</span></code></a></p></td>
<td><p>Return the dtypes in the DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.shape.html#pyspark.pandas.DataFrame.shape" title="pyspark.pandas.DataFrame.shape"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.shape</span></code></a></p></td>
<td><p>Return a tuple representing the dimensionality of the DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.axes.html#pyspark.pandas.DataFrame.axes" title="pyspark.pandas.DataFrame.axes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.axes</span></code></a></p></td>
<td><p>Return a list representing the axes of the DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.ndim.html#pyspark.pandas.DataFrame.ndim" title="pyspark.pandas.DataFrame.ndim"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.ndim</span></code></a></p></td>
<td><p>Return an int representing the number of array dimensions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.size.html#pyspark.pandas.DataFrame.size" title="pyspark.pandas.DataFrame.size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.size</span></code></a></p></td>
<td><p>Return an int representing the number of elements in this object.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.select_dtypes.html#pyspark.pandas.DataFrame.select_dtypes" title="pyspark.pandas.DataFrame.select_dtypes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.select_dtypes</span></code></a>([include, exclude])</p></td>
<td><p>Return a subset of the DataFrame’s columns based on the column dtypes.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.values.html#pyspark.pandas.DataFrame.values" title="pyspark.pandas.DataFrame.values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.values</span></code></a></p></td>
<td><p>Return a Numpy representation of the DataFrame or the Series.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="conversion">
<h2>Conversion<a class="headerlink" href="#conversion" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.copy.html#pyspark.pandas.DataFrame.copy" title="pyspark.pandas.DataFrame.copy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.copy</span></code></a>([deep])</p></td>
<td><p>Make a copy of this object’s indices and data.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.isna.html#pyspark.pandas.DataFrame.isna" title="pyspark.pandas.DataFrame.isna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.isna</span></code></a>()</p></td>
<td><p>Detects missing values for items in the current Dataframe.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.astype.html#pyspark.pandas.DataFrame.astype" title="pyspark.pandas.DataFrame.astype"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.astype</span></code></a>(dtype)</p></td>
<td><p>Cast a pandas-on-Spark object to a specified dtype <code class="docutils literal notranslate"><span class="pre">dtype</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.isnull.html#pyspark.pandas.DataFrame.isnull" title="pyspark.pandas.DataFrame.isnull"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.isnull</span></code></a>()</p></td>
<td><p>Detects missing values for items in the current Dataframe.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.notna.html#pyspark.pandas.DataFrame.notna" title="pyspark.pandas.DataFrame.notna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.notna</span></code></a>()</p></td>
<td><p>Detects non-missing values for items in the current Dataframe.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.notnull.html#pyspark.pandas.DataFrame.notnull" title="pyspark.pandas.DataFrame.notnull"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.notnull</span></code></a>()</p></td>
<td><p>Detects non-missing values for items in the current Dataframe.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pad.html#pyspark.pandas.DataFrame.pad" title="pyspark.pandas.DataFrame.pad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pad</span></code></a>([axis, inplace, limit])</p></td>
<td><p>Synonym for <cite>DataFrame.fillna()</cite> or <cite>Series.fillna()</cite> with <code class="docutils literal notranslate"><span class="pre">method=`ffill`</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.bool.html#pyspark.pandas.DataFrame.bool" title="pyspark.pandas.DataFrame.bool"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.bool</span></code></a>()</p></td>
<td><p>Return the bool of a single element in the current object.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="indexing-iteration">
<h2>Indexing, iteration<a class="headerlink" href="#indexing-iteration" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.at.html#pyspark.pandas.DataFrame.at" title="pyspark.pandas.DataFrame.at"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.at</span></code></a></p></td>
<td><p>Access a single value for a row/column label pair.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.iat.html#pyspark.pandas.DataFrame.iat" title="pyspark.pandas.DataFrame.iat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.iat</span></code></a></p></td>
<td><p>Access a single value for a row/column pair by integer position.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.head.html#pyspark.pandas.DataFrame.head" title="pyspark.pandas.DataFrame.head"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.head</span></code></a>([n])</p></td>
<td><p>Return the first <cite>n</cite> rows.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.idxmax.html#pyspark.pandas.DataFrame.idxmax" title="pyspark.pandas.DataFrame.idxmax"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.idxmax</span></code></a>([axis])</p></td>
<td><p>Return index of first occurrence of maximum over requested axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.idxmin.html#pyspark.pandas.DataFrame.idxmin" title="pyspark.pandas.DataFrame.idxmin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.idxmin</span></code></a>([axis])</p></td>
<td><p>Return index of first occurrence of minimum over requested axis.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.loc.html#pyspark.pandas.DataFrame.loc" title="pyspark.pandas.DataFrame.loc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.loc</span></code></a></p></td>
<td><p>Access a group of rows and columns by label(s) or a boolean Series.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.iloc.html#pyspark.pandas.DataFrame.iloc" title="pyspark.pandas.DataFrame.iloc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.iloc</span></code></a></p></td>
<td><p>Purely integer-location based indexing for selection by position.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.items.html#pyspark.pandas.DataFrame.items" title="pyspark.pandas.DataFrame.items"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.items</span></code></a>()</p></td>
<td><p>Iterator over (column name, Series) pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.iteritems.html#pyspark.pandas.DataFrame.iteritems" title="pyspark.pandas.DataFrame.iteritems"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.iteritems</span></code></a>()</p></td>
<td><p>This is an alias of <code class="docutils literal notranslate"><span class="pre">items</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.iterrows.html#pyspark.pandas.DataFrame.iterrows" title="pyspark.pandas.DataFrame.iterrows"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.iterrows</span></code></a>()</p></td>
<td><p>Iterate over DataFrame rows as (index, Series) pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.itertuples.html#pyspark.pandas.DataFrame.itertuples" title="pyspark.pandas.DataFrame.itertuples"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.itertuples</span></code></a>([index, name])</p></td>
<td><p>Iterate over DataFrame rows as namedtuples.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.keys.html#pyspark.pandas.DataFrame.keys" title="pyspark.pandas.DataFrame.keys"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.keys</span></code></a>()</p></td>
<td><p>Return alias for columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pop.html#pyspark.pandas.DataFrame.pop" title="pyspark.pandas.DataFrame.pop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pop</span></code></a>(item)</p></td>
<td><p>Return item and drop from frame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.tail.html#pyspark.pandas.DataFrame.tail" title="pyspark.pandas.DataFrame.tail"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.tail</span></code></a>([n])</p></td>
<td><p>Return the last <cite>n</cite> rows.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.xs.html#pyspark.pandas.DataFrame.xs" title="pyspark.pandas.DataFrame.xs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.xs</span></code></a>(key[, axis, level])</p></td>
<td><p>Return cross-section from the DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.get.html#pyspark.pandas.DataFrame.get" title="pyspark.pandas.DataFrame.get"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.get</span></code></a>(key[, default])</p></td>
<td><p>Get item from object for given key (DataFrame column, Panel slice, etc.).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.where.html#pyspark.pandas.DataFrame.where" title="pyspark.pandas.DataFrame.where"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.where</span></code></a>(cond[, other, axis])</p></td>
<td><p>Replace values where the condition is False.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.mask.html#pyspark.pandas.DataFrame.mask" title="pyspark.pandas.DataFrame.mask"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mask</span></code></a>(cond[, other])</p></td>
<td><p>Replace values where the condition is True.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.query.html#pyspark.pandas.DataFrame.query" title="pyspark.pandas.DataFrame.query"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.query</span></code></a>(expr[, inplace])</p></td>
<td><p>Query the columns of a DataFrame with a boolean expression.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="binary-operator-functions">
<h2>Binary operator functions<a class="headerlink" href="#binary-operator-functions" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.add.html#pyspark.pandas.DataFrame.add" title="pyspark.pandas.DataFrame.add"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.add</span></code></a>(other)</p></td>
<td><p>Get Addition of dataframe and other, element-wise (binary operator <cite>+</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.radd.html#pyspark.pandas.DataFrame.radd" title="pyspark.pandas.DataFrame.radd"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.radd</span></code></a>(other)</p></td>
<td><p>Get Addition of dataframe and other, element-wise (binary operator <cite>+</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.div.html#pyspark.pandas.DataFrame.div" title="pyspark.pandas.DataFrame.div"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.div</span></code></a>(other)</p></td>
<td><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>/</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rdiv.html#pyspark.pandas.DataFrame.rdiv" title="pyspark.pandas.DataFrame.rdiv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rdiv</span></code></a>(other)</p></td>
<td><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>/</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.truediv.html#pyspark.pandas.DataFrame.truediv" title="pyspark.pandas.DataFrame.truediv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.truediv</span></code></a>(other)</p></td>
<td><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>/</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rtruediv.html#pyspark.pandas.DataFrame.rtruediv" title="pyspark.pandas.DataFrame.rtruediv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rtruediv</span></code></a>(other)</p></td>
<td><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>/</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.mul.html#pyspark.pandas.DataFrame.mul" title="pyspark.pandas.DataFrame.mul"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mul</span></code></a>(other)</p></td>
<td><p>Get Multiplication of dataframe and other, element-wise (binary operator <cite>*</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rmul.html#pyspark.pandas.DataFrame.rmul" title="pyspark.pandas.DataFrame.rmul"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rmul</span></code></a>(other)</p></td>
<td><p>Get Multiplication of dataframe and other, element-wise (binary operator <cite>*</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.sub.html#pyspark.pandas.DataFrame.sub" title="pyspark.pandas.DataFrame.sub"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sub</span></code></a>(other)</p></td>
<td><p>Get Subtraction of dataframe and other, element-wise (binary operator <cite>-</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rsub.html#pyspark.pandas.DataFrame.rsub" title="pyspark.pandas.DataFrame.rsub"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rsub</span></code></a>(other)</p></td>
<td><p>Get Subtraction of dataframe and other, element-wise (binary operator <cite>-</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pow.html#pyspark.pandas.DataFrame.pow" title="pyspark.pandas.DataFrame.pow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pow</span></code></a>(other)</p></td>
<td><p>Get Exponential power of series of dataframe and other, element-wise (binary operator <cite>**</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rpow.html#pyspark.pandas.DataFrame.rpow" title="pyspark.pandas.DataFrame.rpow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rpow</span></code></a>(other)</p></td>
<td><p>Get Exponential power of dataframe and other, element-wise (binary operator <cite>**</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.mod.html#pyspark.pandas.DataFrame.mod" title="pyspark.pandas.DataFrame.mod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mod</span></code></a>(other)</p></td>
<td><p>Get Modulo of dataframe and other, element-wise (binary operator <cite>%</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rmod.html#pyspark.pandas.DataFrame.rmod" title="pyspark.pandas.DataFrame.rmod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rmod</span></code></a>(other)</p></td>
<td><p>Get Modulo of dataframe and other, element-wise (binary operator <cite>%</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.floordiv.html#pyspark.pandas.DataFrame.floordiv" title="pyspark.pandas.DataFrame.floordiv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.floordiv</span></code></a>(other)</p></td>
<td><p>Get Integer division of dataframe and other, element-wise (binary operator <cite>//</cite>).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rfloordiv.html#pyspark.pandas.DataFrame.rfloordiv" title="pyspark.pandas.DataFrame.rfloordiv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rfloordiv</span></code></a>(other)</p></td>
<td><p>Get Integer division of dataframe and other, element-wise (binary operator <cite>//</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.lt.html#pyspark.pandas.DataFrame.lt" title="pyspark.pandas.DataFrame.lt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.lt</span></code></a>(other)</p></td>
<td><p>Compare if the current value is less than the other.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.gt.html#pyspark.pandas.DataFrame.gt" title="pyspark.pandas.DataFrame.gt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.gt</span></code></a>(other)</p></td>
<td><p>Compare if the current value is greater than the other.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.le.html#pyspark.pandas.DataFrame.le" title="pyspark.pandas.DataFrame.le"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.le</span></code></a>(other)</p></td>
<td><p>Compare if the current value is less than or equal to the other.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.ge.html#pyspark.pandas.DataFrame.ge" title="pyspark.pandas.DataFrame.ge"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.ge</span></code></a>(other)</p></td>
<td><p>Compare if the current value is greater than or equal to the other.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.ne.html#pyspark.pandas.DataFrame.ne" title="pyspark.pandas.DataFrame.ne"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.ne</span></code></a>(other)</p></td>
<td><p>Compare if the current value is not equal to the other.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.eq.html#pyspark.pandas.DataFrame.eq" title="pyspark.pandas.DataFrame.eq"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.eq</span></code></a>(other)</p></td>
<td><p>Compare if the current value is equal to the other.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.dot.html#pyspark.pandas.DataFrame.dot" title="pyspark.pandas.DataFrame.dot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.dot</span></code></a>(other)</p></td>
<td><p>Compute the matrix multiplication between the DataFrame and others.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.combine_first.html#pyspark.pandas.DataFrame.combine_first" title="pyspark.pandas.DataFrame.combine_first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.combine_first</span></code></a>(other)</p></td>
<td><p>Update null elements with value in the same location in <cite>other</cite>.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="function-application-groupby-window">
<h2>Function application, GroupBy &amp; Window<a class="headerlink" href="#function-application-groupby-window" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.apply.html#pyspark.pandas.DataFrame.apply" title="pyspark.pandas.DataFrame.apply"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.apply</span></code></a>(func[, axis, args])</p></td>
<td><p>Apply a function along an axis of the DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.applymap.html#pyspark.pandas.DataFrame.applymap" title="pyspark.pandas.DataFrame.applymap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.applymap</span></code></a>(func)</p></td>
<td><p>Apply a function to a Dataframe elementwise.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pipe.html#pyspark.pandas.DataFrame.pipe" title="pyspark.pandas.DataFrame.pipe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pipe</span></code></a>(func, *args, **kwargs)</p></td>
<td><p>Apply func(self, *args, **kwargs).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.agg.html#pyspark.pandas.DataFrame.agg" title="pyspark.pandas.DataFrame.agg"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.agg</span></code></a>(func)</p></td>
<td><p>Aggregate using one or more operations over the specified axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.aggregate.html#pyspark.pandas.DataFrame.aggregate" title="pyspark.pandas.DataFrame.aggregate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.aggregate</span></code></a>(func)</p></td>
<td><p>Aggregate using one or more operations over the specified axis.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.groupby.html#pyspark.pandas.DataFrame.groupby" title="pyspark.pandas.DataFrame.groupby"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.groupby</span></code></a>(by[, axis, as_index, dropna])</p></td>
<td><p>Group DataFrame or Series using one or more columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rolling.html#pyspark.pandas.DataFrame.rolling" title="pyspark.pandas.DataFrame.rolling"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rolling</span></code></a>(window[, min_periods])</p></td>
<td><p>Provide rolling transformations.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.expanding.html#pyspark.pandas.DataFrame.expanding" title="pyspark.pandas.DataFrame.expanding"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.expanding</span></code></a>([min_periods])</p></td>
<td><p>Provide expanding transformations.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.transform.html#pyspark.pandas.DataFrame.transform" title="pyspark.pandas.DataFrame.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.transform</span></code></a>(func[, axis])</p></td>
<td><p>Call <code class="docutils literal notranslate"><span class="pre">func</span></code> on self producing a Series with transformed values and that has the same length as its input.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="computations-descriptive-stats">
<span id="api-dataframe-stats"></span><h2>Computations / Descriptive Stats<a class="headerlink" href="#computations-descriptive-stats" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.abs.html#pyspark.pandas.DataFrame.abs" title="pyspark.pandas.DataFrame.abs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.abs</span></code></a>()</p></td>
<td><p>Return a Series/DataFrame with absolute numeric value of each element.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.all.html#pyspark.pandas.DataFrame.all" title="pyspark.pandas.DataFrame.all"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.all</span></code></a>([axis, bool_only, skipna])</p></td>
<td><p>Return whether all elements are True.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.any.html#pyspark.pandas.DataFrame.any" title="pyspark.pandas.DataFrame.any"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.any</span></code></a>([axis, bool_only])</p></td>
<td><p>Return whether any element is True.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.clip.html#pyspark.pandas.DataFrame.clip" title="pyspark.pandas.DataFrame.clip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.clip</span></code></a>([lower, upper])</p></td>
<td><p>Trim values at input threshold(s).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.corr.html#pyspark.pandas.DataFrame.corr" title="pyspark.pandas.DataFrame.corr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.corr</span></code></a>([method, min_periods])</p></td>
<td><p>Compute pairwise correlation of columns, excluding NA/null values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.corrwith.html#pyspark.pandas.DataFrame.corrwith" title="pyspark.pandas.DataFrame.corrwith"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.corrwith</span></code></a>(other[, axis, drop, method])</p></td>
<td><p>Compute pairwise correlation.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.count.html#pyspark.pandas.DataFrame.count" title="pyspark.pandas.DataFrame.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.count</span></code></a>([axis, numeric_only])</p></td>
<td><p>Count non-NA cells for each column.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.cov.html#pyspark.pandas.DataFrame.cov" title="pyspark.pandas.DataFrame.cov"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cov</span></code></a>([min_periods, ddof])</p></td>
<td><p>Compute pairwise covariance of columns, excluding NA/null values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.describe.html#pyspark.pandas.DataFrame.describe" title="pyspark.pandas.DataFrame.describe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.describe</span></code></a>([percentiles])</p></td>
<td><p>Generate descriptive statistics that summarize the central tendency, dispersion and shape of a dataset’s distribution, excluding <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.ewm.html#pyspark.pandas.DataFrame.ewm" title="pyspark.pandas.DataFrame.ewm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.ewm</span></code></a>([com, span, halflife, alpha, …])</p></td>
<td><p>Provide exponentially weighted window transformations.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.kurt.html#pyspark.pandas.DataFrame.kurt" title="pyspark.pandas.DataFrame.kurt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.kurt</span></code></a>([axis, skipna, numeric_only])</p></td>
<td><p>Return unbiased kurtosis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.kurtosis.html#pyspark.pandas.DataFrame.kurtosis" title="pyspark.pandas.DataFrame.kurtosis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.kurtosis</span></code></a>([axis, skipna, numeric_only])</p></td>
<td><p>Return unbiased kurtosis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.mad.html#pyspark.pandas.DataFrame.mad" title="pyspark.pandas.DataFrame.mad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mad</span></code></a>([axis])</p></td>
<td><p>Return the mean absolute deviation of values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.max.html#pyspark.pandas.DataFrame.max" title="pyspark.pandas.DataFrame.max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.max</span></code></a>([axis, skipna, numeric_only])</p></td>
<td><p>Return the maximum of the values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.mean.html#pyspark.pandas.DataFrame.mean" title="pyspark.pandas.DataFrame.mean"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mean</span></code></a>([axis, skipna, numeric_only])</p></td>
<td><p>Return the mean of the values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.min.html#pyspark.pandas.DataFrame.min" title="pyspark.pandas.DataFrame.min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.min</span></code></a>([axis, skipna, numeric_only])</p></td>
<td><p>Return the minimum of the values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.median.html#pyspark.pandas.DataFrame.median" title="pyspark.pandas.DataFrame.median"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.median</span></code></a>([axis, skipna, …])</p></td>
<td><p>Return the median of the values for the requested axis.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.mode.html#pyspark.pandas.DataFrame.mode" title="pyspark.pandas.DataFrame.mode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.mode</span></code></a>([axis, numeric_only, dropna])</p></td>
<td><p>Get the mode(s) of each element along the selected axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pct_change.html#pyspark.pandas.DataFrame.pct_change" title="pyspark.pandas.DataFrame.pct_change"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pct_change</span></code></a>([periods])</p></td>
<td><p>Percentage change between the current and a prior element.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.prod.html#pyspark.pandas.DataFrame.prod" title="pyspark.pandas.DataFrame.prod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.prod</span></code></a>([axis, skipna, numeric_only, …])</p></td>
<td><p>Return the product of the values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.product.html#pyspark.pandas.DataFrame.product" title="pyspark.pandas.DataFrame.product"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.product</span></code></a>([axis, skipna, …])</p></td>
<td><p>Return the product of the values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.quantile.html#pyspark.pandas.DataFrame.quantile" title="pyspark.pandas.DataFrame.quantile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.quantile</span></code></a>([q, axis, numeric_only, …])</p></td>
<td><p>Return value at the given quantile.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.nunique.html#pyspark.pandas.DataFrame.nunique" title="pyspark.pandas.DataFrame.nunique"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.nunique</span></code></a>([axis, dropna, approx, rsd])</p></td>
<td><p>Return number of unique elements in the object.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.sem.html#pyspark.pandas.DataFrame.sem" title="pyspark.pandas.DataFrame.sem"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sem</span></code></a>([axis, skipna, ddof, numeric_only])</p></td>
<td><p>Return unbiased standard error of the mean over requested axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.skew.html#pyspark.pandas.DataFrame.skew" title="pyspark.pandas.DataFrame.skew"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.skew</span></code></a>([axis, skipna, numeric_only])</p></td>
<td><p>Return unbiased skew normalized by N-1.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.sum.html#pyspark.pandas.DataFrame.sum" title="pyspark.pandas.DataFrame.sum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sum</span></code></a>([axis, skipna, numeric_only, …])</p></td>
<td><p>Return the sum of the values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.std.html#pyspark.pandas.DataFrame.std" title="pyspark.pandas.DataFrame.std"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.std</span></code></a>([axis, skipna, ddof, numeric_only])</p></td>
<td><p>Return sample standard deviation.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.var.html#pyspark.pandas.DataFrame.var" title="pyspark.pandas.DataFrame.var"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.var</span></code></a>([axis, ddof, numeric_only])</p></td>
<td><p>Return unbiased variance.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.cummin.html#pyspark.pandas.DataFrame.cummin" title="pyspark.pandas.DataFrame.cummin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cummin</span></code></a>([skipna])</p></td>
<td><p>Return cumulative minimum over a DataFrame or Series axis.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.cummax.html#pyspark.pandas.DataFrame.cummax" title="pyspark.pandas.DataFrame.cummax"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cummax</span></code></a>([skipna])</p></td>
<td><p>Return cumulative maximum over a DataFrame or Series axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.cumsum.html#pyspark.pandas.DataFrame.cumsum" title="pyspark.pandas.DataFrame.cumsum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cumsum</span></code></a>([skipna])</p></td>
<td><p>Return cumulative sum over a DataFrame or Series axis.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.cumprod.html#pyspark.pandas.DataFrame.cumprod" title="pyspark.pandas.DataFrame.cumprod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.cumprod</span></code></a>([skipna])</p></td>
<td><p>Return cumulative product over a DataFrame or Series axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.round.html#pyspark.pandas.DataFrame.round" title="pyspark.pandas.DataFrame.round"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.round</span></code></a>([decimals])</p></td>
<td><p>Round a DataFrame to a variable number of decimal places.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.diff.html#pyspark.pandas.DataFrame.diff" title="pyspark.pandas.DataFrame.diff"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.diff</span></code></a>([periods, axis])</p></td>
<td><p>First discrete difference of element.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.eval.html#pyspark.pandas.DataFrame.eval" title="pyspark.pandas.DataFrame.eval"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.eval</span></code></a>(expr[, inplace])</p></td>
<td><p>Evaluate a string describing operations on DataFrame columns.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="reindexing-selection-label-manipulation">
<h2>Reindexing / Selection / Label manipulation<a class="headerlink" href="#reindexing-selection-label-manipulation" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.add_prefix.html#pyspark.pandas.DataFrame.add_prefix" title="pyspark.pandas.DataFrame.add_prefix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.add_prefix</span></code></a>(prefix)</p></td>
<td><p>Prefix labels with string <cite>prefix</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.add_suffix.html#pyspark.pandas.DataFrame.add_suffix" title="pyspark.pandas.DataFrame.add_suffix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.add_suffix</span></code></a>(suffix)</p></td>
<td><p>Suffix labels with string <cite>suffix</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.align.html#pyspark.pandas.DataFrame.align" title="pyspark.pandas.DataFrame.align"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.align</span></code></a>(other[, join, axis, copy])</p></td>
<td><p>Align two objects on their axes with the specified join method.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.at_time.html#pyspark.pandas.DataFrame.at_time" title="pyspark.pandas.DataFrame.at_time"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.at_time</span></code></a>(time[, asof, axis])</p></td>
<td><p>Select values at particular time of day (example: 9:30AM).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.between_time.html#pyspark.pandas.DataFrame.between_time" title="pyspark.pandas.DataFrame.between_time"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.between_time</span></code></a>(start_time, end_time)</p></td>
<td><p>Select values between particular times of the day (example: 9:00-9:30 AM).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.drop.html#pyspark.pandas.DataFrame.drop" title="pyspark.pandas.DataFrame.drop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.drop</span></code></a>([labels, axis, index, columns])</p></td>
<td><p>Drop specified labels from columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.droplevel.html#pyspark.pandas.DataFrame.droplevel" title="pyspark.pandas.DataFrame.droplevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.droplevel</span></code></a>(level[, axis])</p></td>
<td><p>Return DataFrame with requested index / column level(s) removed.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.drop_duplicates.html#pyspark.pandas.DataFrame.drop_duplicates" title="pyspark.pandas.DataFrame.drop_duplicates"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.drop_duplicates</span></code></a>([subset, keep, …])</p></td>
<td><p>Return DataFrame with duplicate rows removed, optionally only considering certain columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.duplicated.html#pyspark.pandas.DataFrame.duplicated" title="pyspark.pandas.DataFrame.duplicated"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.duplicated</span></code></a>([subset, keep])</p></td>
<td><p>Return boolean Series denoting duplicate rows, optionally only considering certain columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.equals.html#pyspark.pandas.DataFrame.equals" title="pyspark.pandas.DataFrame.equals"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.equals</span></code></a>(other)</p></td>
<td><p>Compare if the current value is equal to the other.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.filter.html#pyspark.pandas.DataFrame.filter" title="pyspark.pandas.DataFrame.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.filter</span></code></a>([items, like, regex, axis])</p></td>
<td><p>Subset rows or columns of dataframe according to labels in the specified index.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.first.html#pyspark.pandas.DataFrame.first" title="pyspark.pandas.DataFrame.first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.first</span></code></a>(offset)</p></td>
<td><p>Select first periods of time series data based on a date offset.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.head.html#pyspark.pandas.DataFrame.head" title="pyspark.pandas.DataFrame.head"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.head</span></code></a>([n])</p></td>
<td><p>Return the first <cite>n</cite> rows.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.last.html#pyspark.pandas.DataFrame.last" title="pyspark.pandas.DataFrame.last"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.last</span></code></a>(offset)</p></td>
<td><p>Select final periods of time series data based on a date offset.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rename.html#pyspark.pandas.DataFrame.rename" title="pyspark.pandas.DataFrame.rename"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rename</span></code></a>([mapper, index, columns, …])</p></td>
<td><p>Alter axes labels.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rename_axis.html#pyspark.pandas.DataFrame.rename_axis" title="pyspark.pandas.DataFrame.rename_axis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rename_axis</span></code></a>([mapper, index, …])</p></td>
<td><p>Set the name of the axis for the index or columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.reset_index.html#pyspark.pandas.DataFrame.reset_index" title="pyspark.pandas.DataFrame.reset_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.reset_index</span></code></a>([level, drop, …])</p></td>
<td><p>Reset the index, or a level of it.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.set_index.html#pyspark.pandas.DataFrame.set_index" title="pyspark.pandas.DataFrame.set_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.set_index</span></code></a>(keys[, drop, append, …])</p></td>
<td><p>Set the DataFrame index (row labels) using one or more existing columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.swapaxes.html#pyspark.pandas.DataFrame.swapaxes" title="pyspark.pandas.DataFrame.swapaxes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.swapaxes</span></code></a>(i, j[, copy])</p></td>
<td><p>Interchange axes and swap values axes appropriately.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.swaplevel.html#pyspark.pandas.DataFrame.swaplevel" title="pyspark.pandas.DataFrame.swaplevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.swaplevel</span></code></a>([i, j, axis])</p></td>
<td><p>Swap levels i and j in a MultiIndex on a particular axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.take.html#pyspark.pandas.DataFrame.take" title="pyspark.pandas.DataFrame.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.take</span></code></a>(indices[, axis])</p></td>
<td><p>Return the elements in the given <em>positional</em> indices along an axis.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.isin.html#pyspark.pandas.DataFrame.isin" title="pyspark.pandas.DataFrame.isin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.isin</span></code></a>(values)</p></td>
<td><p>Whether each element in the DataFrame is contained in values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.sample.html#pyspark.pandas.DataFrame.sample" title="pyspark.pandas.DataFrame.sample"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sample</span></code></a>([n, frac, replace, …])</p></td>
<td><p>Return a random sample of items from an axis of object.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.truncate.html#pyspark.pandas.DataFrame.truncate" title="pyspark.pandas.DataFrame.truncate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.truncate</span></code></a>([before, after, axis, copy])</p></td>
<td><p>Truncate a Series or DataFrame before and after some index value.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="missing-data-handling">
<span id="api-dataframe-missing"></span><h2>Missing data handling<a class="headerlink" href="#missing-data-handling" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.backfill.html#pyspark.pandas.DataFrame.backfill" title="pyspark.pandas.DataFrame.backfill"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.backfill</span></code></a>([axis, inplace, limit])</p></td>
<td><p>Synonym for <cite>DataFrame.fillna()</cite> or <cite>Series.fillna()</cite> with <code class="docutils literal notranslate"><span class="pre">method=`bfill`</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.dropna.html#pyspark.pandas.DataFrame.dropna" title="pyspark.pandas.DataFrame.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.dropna</span></code></a>([axis, how, thresh, …])</p></td>
<td><p>Remove missing values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.fillna.html#pyspark.pandas.DataFrame.fillna" title="pyspark.pandas.DataFrame.fillna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.fillna</span></code></a>([value, method, axis, …])</p></td>
<td><p>Fill NA/NaN values.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.replace.html#pyspark.pandas.DataFrame.replace" title="pyspark.pandas.DataFrame.replace"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.replace</span></code></a>([to_replace, value, …])</p></td>
<td><p>Returns a new DataFrame replacing a value with another value.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.bfill.html#pyspark.pandas.DataFrame.bfill" title="pyspark.pandas.DataFrame.bfill"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.bfill</span></code></a>([axis, inplace, limit])</p></td>
<td><p>Synonym for <cite>DataFrame.fillna()</cite> or <cite>Series.fillna()</cite> with <code class="docutils literal notranslate"><span class="pre">method=`bfill`</span></code>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.ffill.html#pyspark.pandas.DataFrame.ffill" title="pyspark.pandas.DataFrame.ffill"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.ffill</span></code></a>([axis, inplace, limit])</p></td>
<td><p>Synonym for <cite>DataFrame.fillna()</cite> or <cite>Series.fillna()</cite> with <code class="docutils literal notranslate"><span class="pre">method=`ffill`</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.interpolate.html#pyspark.pandas.DataFrame.interpolate" title="pyspark.pandas.DataFrame.interpolate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.interpolate</span></code></a>([method, limit, …])</p></td>
<td><p>Fill NaN values using an interpolation method.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="reshaping-sorting-transposing">
<h2>Reshaping, sorting, transposing<a class="headerlink" href="#reshaping-sorting-transposing" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pivot_table.html#pyspark.pandas.DataFrame.pivot_table" title="pyspark.pandas.DataFrame.pivot_table"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pivot_table</span></code></a>([values, index, …])</p></td>
<td><p>Create a spreadsheet-style pivot table as a DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pivot.html#pyspark.pandas.DataFrame.pivot" title="pyspark.pandas.DataFrame.pivot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pivot</span></code></a>([index, columns, values])</p></td>
<td><p>Return reshaped DataFrame organized by given index / column values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.sort_index.html#pyspark.pandas.DataFrame.sort_index" title="pyspark.pandas.DataFrame.sort_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sort_index</span></code></a>([axis, level, …])</p></td>
<td><p>Sort object by labels (along an axis)</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.sort_values.html#pyspark.pandas.DataFrame.sort_values" title="pyspark.pandas.DataFrame.sort_values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.sort_values</span></code></a>(by[, ascending, …])</p></td>
<td><p>Sort by the values along either axis.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.nlargest.html#pyspark.pandas.DataFrame.nlargest" title="pyspark.pandas.DataFrame.nlargest"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.nlargest</span></code></a>(n, columns[, keep])</p></td>
<td><p>Return the first <cite>n</cite> rows ordered by <cite>columns</cite> in descending order.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.nsmallest.html#pyspark.pandas.DataFrame.nsmallest" title="pyspark.pandas.DataFrame.nsmallest"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.nsmallest</span></code></a>(n, columns[, keep])</p></td>
<td><p>Return the first <cite>n</cite> rows ordered by <cite>columns</cite> in ascending order.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.stack.html#pyspark.pandas.DataFrame.stack" title="pyspark.pandas.DataFrame.stack"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.stack</span></code></a>()</p></td>
<td><p>Stack the prescribed level(s) from columns to index.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.unstack.html#pyspark.pandas.DataFrame.unstack" title="pyspark.pandas.DataFrame.unstack"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.unstack</span></code></a>()</p></td>
<td><p>Pivot the (necessarily hierarchical) index labels.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.melt.html#pyspark.pandas.DataFrame.melt" title="pyspark.pandas.DataFrame.melt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.melt</span></code></a>([id_vars, value_vars, …])</p></td>
<td><p>Unpivot a DataFrame from wide format to long format, optionally leaving identifier variables set.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.explode.html#pyspark.pandas.DataFrame.explode" title="pyspark.pandas.DataFrame.explode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.explode</span></code></a>(column[, ignore_index])</p></td>
<td><p>Transform each element of a list-like to a row, replicating index values.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.squeeze.html#pyspark.pandas.DataFrame.squeeze" title="pyspark.pandas.DataFrame.squeeze"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.squeeze</span></code></a>([axis])</p></td>
<td><p>Squeeze 1 dimensional axis objects into scalars.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.T.html#pyspark.pandas.DataFrame.T" title="pyspark.pandas.DataFrame.T"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.T</span></code></a></p></td>
<td><p>Transpose index and columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.transpose.html#pyspark.pandas.DataFrame.transpose" title="pyspark.pandas.DataFrame.transpose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.transpose</span></code></a>()</p></td>
<td><p>Transpose index and columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.reindex.html#pyspark.pandas.DataFrame.reindex" title="pyspark.pandas.DataFrame.reindex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.reindex</span></code></a>([labels, index, columns, …])</p></td>
<td><p>Conform DataFrame to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.reindex_like.html#pyspark.pandas.DataFrame.reindex_like" title="pyspark.pandas.DataFrame.reindex_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.reindex_like</span></code></a>(other[, copy])</p></td>
<td><p>Return a DataFrame with matching indices as other object.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.rank.html#pyspark.pandas.DataFrame.rank" title="pyspark.pandas.DataFrame.rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.rank</span></code></a>([method, ascending, numeric_only])</p></td>
<td><p>Compute numerical data ranks (1 through n) along axis.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="combining-joining-merging">
<h2>Combining / joining / merging<a class="headerlink" href="#combining-joining-merging" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.append.html#pyspark.pandas.DataFrame.append" title="pyspark.pandas.DataFrame.append"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.append</span></code></a>(other[, ignore_index, …])</p></td>
<td><p>Append rows of other to the end of caller, returning a new object.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.assign.html#pyspark.pandas.DataFrame.assign" title="pyspark.pandas.DataFrame.assign"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.assign</span></code></a>(**kwargs)</p></td>
<td><p>Assign new columns to a DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.merge.html#pyspark.pandas.DataFrame.merge" title="pyspark.pandas.DataFrame.merge"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.merge</span></code></a>(right[, how, on, left_on, …])</p></td>
<td><p>Merge DataFrame objects with a database-style join.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.join.html#pyspark.pandas.DataFrame.join" title="pyspark.pandas.DataFrame.join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.join</span></code></a>(right[, on, how, lsuffix, …])</p></td>
<td><p>Join columns of another DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.update.html#pyspark.pandas.DataFrame.update" title="pyspark.pandas.DataFrame.update"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.update</span></code></a>(other[, join, overwrite])</p></td>
<td><p>Modify in place using non-NA values from another DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.insert.html#pyspark.pandas.DataFrame.insert" title="pyspark.pandas.DataFrame.insert"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.insert</span></code></a>(loc, column, value[, …])</p></td>
<td><p>Insert column into DataFrame at specified location.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="time-series-related">
<h2>Time series-related<a class="headerlink" href="#time-series-related" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.resample.html#pyspark.pandas.DataFrame.resample" title="pyspark.pandas.DataFrame.resample"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.resample</span></code></a>(rule[, closed, label, on])</p></td>
<td><p>Resample time-series data.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.shift.html#pyspark.pandas.DataFrame.shift" title="pyspark.pandas.DataFrame.shift"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.shift</span></code></a>([periods, fill_value])</p></td>
<td><p>Shift DataFrame by desired number of periods.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.first_valid_index.html#pyspark.pandas.DataFrame.first_valid_index" title="pyspark.pandas.DataFrame.first_valid_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.first_valid_index</span></code></a>()</p></td>
<td><p>Retrieves the index of the first valid value.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.last_valid_index.html#pyspark.pandas.DataFrame.last_valid_index" title="pyspark.pandas.DataFrame.last_valid_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.last_valid_index</span></code></a>()</p></td>
<td><p>Return index for last non-NA/null value.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="serialization-io-conversion">
<h2>Serialization / IO / Conversion<a class="headerlink" href="#serialization-io-conversion" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.from_records.html#pyspark.pandas.DataFrame.from_records" title="pyspark.pandas.DataFrame.from_records"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.from_records</span></code></a>(data[, index, …])</p></td>
<td><p>Convert structured or recorded ndarray to DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.info.html#pyspark.pandas.DataFrame.info" title="pyspark.pandas.DataFrame.info"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.info</span></code></a>([verbose, buf, max_cols, …])</p></td>
<td><p>Print a concise summary of a DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_table.html#pyspark.pandas.DataFrame.to_table" title="pyspark.pandas.DataFrame.to_table"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_table</span></code></a>(name[, format, mode, …])</p></td>
<td><p>Write the DataFrame into a Spark table.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_delta.html#pyspark.pandas.DataFrame.to_delta" title="pyspark.pandas.DataFrame.to_delta"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_delta</span></code></a>(path[, mode, …])</p></td>
<td><p>Write the DataFrame out as a Delta Lake table.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_parquet.html#pyspark.pandas.DataFrame.to_parquet" title="pyspark.pandas.DataFrame.to_parquet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_parquet</span></code></a>(path[, mode, …])</p></td>
<td><p>Write the DataFrame out as a Parquet file or directory.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_spark_io.html#pyspark.pandas.DataFrame.to_spark_io" title="pyspark.pandas.DataFrame.to_spark_io"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_spark_io</span></code></a>([path, format, mode, …])</p></td>
<td><p>Write the DataFrame out to a Spark data source.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_csv.html#pyspark.pandas.DataFrame.to_csv" title="pyspark.pandas.DataFrame.to_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_csv</span></code></a>([path, sep, na_rep, …])</p></td>
<td><p>Write object to a comma-separated values (csv) file.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_pandas.html#pyspark.pandas.DataFrame.to_pandas" title="pyspark.pandas.DataFrame.to_pandas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_pandas</span></code></a>()</p></td>
<td><p>Return a pandas DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_html.html#pyspark.pandas.DataFrame.to_html" title="pyspark.pandas.DataFrame.to_html"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_html</span></code></a>([buf, columns, col_space, …])</p></td>
<td><p>Render a DataFrame as an HTML table.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_numpy.html#pyspark.pandas.DataFrame.to_numpy" title="pyspark.pandas.DataFrame.to_numpy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_numpy</span></code></a>()</p></td>
<td><p>A NumPy ndarray representing the values in this DataFrame or Series.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_spark.html#pyspark.pandas.DataFrame.to_spark" title="pyspark.pandas.DataFrame.to_spark"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_spark</span></code></a>([index_col])</p></td>
<td><p>Spark related features.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_string.html#pyspark.pandas.DataFrame.to_string" title="pyspark.pandas.DataFrame.to_string"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_string</span></code></a>([buf, columns, …])</p></td>
<td><p>Render a DataFrame to a console-friendly tabular output.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_json.html#pyspark.pandas.DataFrame.to_json" title="pyspark.pandas.DataFrame.to_json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_json</span></code></a>([path, compression, …])</p></td>
<td><p>Convert the object to a JSON string.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_dict.html#pyspark.pandas.DataFrame.to_dict" title="pyspark.pandas.DataFrame.to_dict"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_dict</span></code></a>([orient, into])</p></td>
<td><p>Convert the DataFrame to a dictionary.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_excel.html#pyspark.pandas.DataFrame.to_excel" title="pyspark.pandas.DataFrame.to_excel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_excel</span></code></a>(excel_writer[, …])</p></td>
<td><p>Write object to an Excel sheet.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_clipboard.html#pyspark.pandas.DataFrame.to_clipboard" title="pyspark.pandas.DataFrame.to_clipboard"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_clipboard</span></code></a>([excel, sep])</p></td>
<td><p>Copy object to the system clipboard.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_markdown.html#pyspark.pandas.DataFrame.to_markdown" title="pyspark.pandas.DataFrame.to_markdown"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_markdown</span></code></a>([buf, mode])</p></td>
<td><p>Print Series or DataFrame in Markdown-friendly format.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_records.html#pyspark.pandas.DataFrame.to_records" title="pyspark.pandas.DataFrame.to_records"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_records</span></code></a>([index, column_dtypes, …])</p></td>
<td><p>Convert DataFrame to a NumPy record array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.to_latex.html#pyspark.pandas.DataFrame.to_latex" title="pyspark.pandas.DataFrame.to_latex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.to_latex</span></code></a>([buf, columns, …])</p></td>
<td><p>Render an object to a LaTeX tabular environment table.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.style.html#pyspark.pandas.DataFrame.style" title="pyspark.pandas.DataFrame.style"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.style</span></code></a></p></td>
<td><p>Property returning a Styler object containing methods for building a styled HTML representation for the DataFrame.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="spark-related">
<h2>Spark-related<a class="headerlink" href="#spark-related" title="Permalink to this headline"></a></h2>
<p><code class="docutils literal notranslate"><span class="pre">DataFrame.spark</span></code> provides features that does not exist in pandas but
in Spark. These can be accessed by <code class="docutils literal notranslate"><span class="pre">DataFrame.spark.&lt;function/property&gt;</span></code>.</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.frame.html#pyspark.pandas.DataFrame.spark.frame" title="pyspark.pandas.DataFrame.spark.frame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.frame</span></code></a>([index_col])</p></td>
<td><p>Return the current DataFrame as a Spark DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.cache.html#pyspark.pandas.DataFrame.spark.cache" title="pyspark.pandas.DataFrame.spark.cache"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.cache</span></code></a>()</p></td>
<td><p>Yields and caches the current DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.persist.html#pyspark.pandas.DataFrame.spark.persist" title="pyspark.pandas.DataFrame.spark.persist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.persist</span></code></a>([storage_level])</p></td>
<td><p>Yields and caches the current DataFrame with a specific StorageLevel.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.hint.html#pyspark.pandas.DataFrame.spark.hint" title="pyspark.pandas.DataFrame.spark.hint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.hint</span></code></a>(name, *parameters)</p></td>
<td><p>Specifies some hint on the current DataFrame.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.to_table.html#pyspark.pandas.DataFrame.spark.to_table" title="pyspark.pandas.DataFrame.spark.to_table"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.to_table</span></code></a>(name[, format, …])</p></td>
<td><p>Write the DataFrame into a Spark table.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.to_spark_io.html#pyspark.pandas.DataFrame.spark.to_spark_io" title="pyspark.pandas.DataFrame.spark.to_spark_io"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.to_spark_io</span></code></a>([path, format, …])</p></td>
<td><p>Write the DataFrame out to a Spark data source.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.apply.html#pyspark.pandas.DataFrame.spark.apply" title="pyspark.pandas.DataFrame.spark.apply"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.apply</span></code></a>(func[, index_col])</p></td>
<td><p>Applies a function that takes and returns a Spark DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.repartition.html#pyspark.pandas.DataFrame.spark.repartition" title="pyspark.pandas.DataFrame.spark.repartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.repartition</span></code></a>(num_partitions)</p></td>
<td><p>Returns a new DataFrame partitioned by the given partitioning expressions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.spark.coalesce.html#pyspark.pandas.DataFrame.spark.coalesce" title="pyspark.pandas.DataFrame.spark.coalesce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.spark.coalesce</span></code></a>(num_partitions)</p></td>
<td><p>Returns a new DataFrame that has exactly <cite>num_partitions</cite> partitions.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="plotting">
<span id="api-dataframe-plot"></span><h2>Plotting<a class="headerlink" href="#plotting" title="Permalink to this headline"></a></h2>
<p><code class="docutils literal notranslate"><span class="pre">DataFrame.plot</span></code> is both a callable method and a namespace attribute for
specific plotting methods of the form <code class="docutils literal notranslate"><span class="pre">DataFrame.plot.&lt;kind&gt;</span></code>.</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.html#pyspark.pandas.DataFrame.plot" title="pyspark.pandas.DataFrame.plot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot</span></code></a></p></td>
<td><p>alias of <code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.pandas.plot.core.PandasOnSparkPlotAccessor</span></code></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.area.html#pyspark.pandas.DataFrame.plot.area" title="pyspark.pandas.DataFrame.plot.area"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.area</span></code></a>([x, y])</p></td>
<td><p>Draw a stacked area plot.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.barh.html#pyspark.pandas.DataFrame.plot.barh" title="pyspark.pandas.DataFrame.plot.barh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.barh</span></code></a>([x, y])</p></td>
<td><p>Make a horizontal bar plot.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.bar.html#pyspark.pandas.DataFrame.plot.bar" title="pyspark.pandas.DataFrame.plot.bar"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.bar</span></code></a>([x, y])</p></td>
<td><p>Vertical bar plot.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.hist.html#pyspark.pandas.DataFrame.plot.hist" title="pyspark.pandas.DataFrame.plot.hist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.hist</span></code></a>([bins])</p></td>
<td><p>Draw one histogram of the DataFrame’s columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.box.html#pyspark.pandas.DataFrame.plot.box" title="pyspark.pandas.DataFrame.plot.box"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.box</span></code></a>(**kwds)</p></td>
<td><p>Make a box plot of the Series columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.line.html#pyspark.pandas.DataFrame.plot.line" title="pyspark.pandas.DataFrame.plot.line"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.line</span></code></a>([x, y])</p></td>
<td><p>Plot DataFrame/Series as lines.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.pie.html#pyspark.pandas.DataFrame.plot.pie" title="pyspark.pandas.DataFrame.plot.pie"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.pie</span></code></a>(**kwds)</p></td>
<td><p>Generate a pie plot.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.scatter.html#pyspark.pandas.DataFrame.plot.scatter" title="pyspark.pandas.DataFrame.plot.scatter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.scatter</span></code></a>(x, y, **kwds)</p></td>
<td><p>Create a scatter plot with varying marker point size and color.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.plot.density.html#pyspark.pandas.DataFrame.plot.density" title="pyspark.pandas.DataFrame.plot.density"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.plot.density</span></code></a>([bw_method, ind])</p></td>
<td><p>Generate Kernel Density Estimate plot using Gaussian kernels.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.hist.html#pyspark.pandas.DataFrame.hist" title="pyspark.pandas.DataFrame.hist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.hist</span></code></a>([bins])</p></td>
<td><p>Draw one histogram of the DataFrame’s columns.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.boxplot.html#pyspark.pandas.DataFrame.boxplot" title="pyspark.pandas.DataFrame.boxplot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.boxplot</span></code></a>(**kwds)</p></td>
<td><p>Make a box plot of the Series columns.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.kde.html#pyspark.pandas.DataFrame.kde" title="pyspark.pandas.DataFrame.kde"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.kde</span></code></a>([bw_method, ind])</p></td>
<td><p>Generate Kernel Density Estimate plot using Gaussian kernels.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="pandas-on-spark-specific">
<h2>Pandas-on-Spark specific<a class="headerlink" href="#pandas-on-spark-specific" title="Permalink to this headline"></a></h2>
<p><code class="docutils literal notranslate"><span class="pre">DataFrame.pandas_on_spark</span></code> provides pandas-on-Spark specific features that exists only in pandas API on Spark.
These can be accessed by <code class="docutils literal notranslate"><span class="pre">DataFrame.pandas_on_spark.&lt;function/property&gt;</span></code>.</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pandas_on_spark.apply_batch.html#pyspark.pandas.DataFrame.pandas_on_spark.apply_batch" title="pyspark.pandas.DataFrame.pandas_on_spark.apply_batch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pandas_on_spark.apply_batch</span></code></a>(func)</p></td>
<td><p>Apply a function that takes pandas DataFrame and outputs pandas DataFrame.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.pandas.DataFrame.pandas_on_spark.transform_batch.html#pyspark.pandas.DataFrame.pandas_on_spark.transform_batch" title="pyspark.pandas.DataFrame.pandas_on_spark.transform_batch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.pandas_on_spark.transform_batch</span></code></a>(…)</p></td>
<td><p>Transform chunks with a function that takes pandas DataFrame and outputs pandas DataFrame.</p></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="api/pyspark.pandas.Series.pandas_on_spark.transform_batch.html" title="previous page">pyspark.pandas.Series.pandas_on_spark.transform_batch</a>
<a class='right-next' id="next-link" href="api/pyspark.pandas.DataFrame.html" title="next page">pyspark.pandas.DataFrame</a>
</div>
</main>
</div>
</div>
<script src="../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>