blob: 2bea12571614f6285289856a5c076e9e20369afe [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.sql.functions.session_window &#8212; PySpark 3.4.3 documentation</title>
<link rel="stylesheet" href="../../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.session_window.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="pyspark.sql.functions.timestamp_seconds" href="pyspark.sql.functions.timestamp_seconds.html" />
<link rel="prev" title="pyspark.sql.functions.window" href="pyspark.sql.functions.window.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../../index.html">Overview</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../user_guide/index.html">User Guides</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../migration_guide/index.html">Migration Guides</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="active">
<a href="../index.html">Spark SQL</a>
<ul>
<li class="">
<a href="../core_classes.html">Core Classes</a>
</li>
<li class="">
<a href="../spark_session.html">Spark Session</a>
</li>
<li class="">
<a href="../configuration.html">Configuration</a>
</li>
<li class="">
<a href="../io.html">Input/Output</a>
</li>
<li class="">
<a href="../dataframe.html">DataFrame</a>
</li>
<li class="">
<a href="../column.html">Column</a>
</li>
<li class="">
<a href="../data_types.html">Data Types</a>
</li>
<li class="">
<a href="../row.html">Row</a>
</li>
<li class="active">
<a href="../functions.html">Functions</a>
</li>
<li class="">
<a href="../window.html">Window</a>
</li>
<li class="">
<a href="../grouping.html">Grouping</a>
</li>
<li class="">
<a href="../catalog.html">Catalog</a>
</li>
<li class="">
<a href="../avro.html">Avro</a>
</li>
<li class="">
<a href="../observation.html">Observation</a>
</li>
<li class="">
<a href="../udf.html">UDF</a>
</li>
<li class="">
<a href="../protobuf.html">Protobuf</a>
</li>
</ul>
</li>
<li class="">
<a href="../../pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="">
<a href="../../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="">
<a href="../../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../../pyspark.streaming.html">Spark Streaming (Legacy)</a>
</li>
<li class="">
<a href="../../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../../pyspark.resource.html">Resource Management</a>
</li>
<li class="">
<a href="../../pyspark.errors.html">Errors</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="pyspark-sql-functions-session-window">
<h1>pyspark.sql.functions.session_window<a class="headerlink" href="#pyspark-sql-functions-session-window" title="Permalink to this headline"></a></h1>
<dl class="py function">
<dt id="pyspark.sql.functions.session_window">
<code class="sig-prename descclassname">pyspark.sql.functions.</code><code class="sig-name descname">session_window</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">timeColumn</span><span class="p">:</span> <span class="n">ColumnOrName</span></em>, <em class="sig-param"><span class="n">gapDuration</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>pyspark.sql.column.Column<span class="p">, </span>str<span class="p">]</span></span></em><span class="sig-paren">)</span> &#x2192; pyspark.sql.column.Column<a class="reference internal" href="../../../_modules/pyspark/sql/functions.html#session_window"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.functions.session_window" title="Permalink to this definition"></a></dt>
<dd><p>Generates session window given a timestamp specifying column.
Session window is one of dynamic windows, which means the length of window is varying
according to the given inputs. The length of session window is defined as “the timestamp
of latest input of the session + gap duration”, so when the new inputs are bound to the
current session window, the end time of session window can be expanded according to the new
inputs.
Windows can support microsecond precision. Windows in the order of months are not supported.
For a streaming query, you may use the function <cite>current_timestamp</cite> to generate windows on
processing time.
gapDuration is provided as strings, e.g. ‘1 second’, ‘1 day 12 hours’, ‘2 minutes’. Valid
interval strings are ‘week’, ‘day’, ‘hour’, ‘minute’, ‘second’, ‘millisecond’, ‘microsecond’.
It could also be a Column which can be evaluated to gap duration dynamically based on the
input row.
The output column will be a struct called ‘session_window’ by default with the nested columns
‘start’ and ‘end’, where ‘start’ and ‘end’ will be of <a class="reference internal" href="pyspark.sql.types.TimestampType.html#pyspark.sql.types.TimestampType" title="pyspark.sql.types.TimestampType"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.types.TimestampType</span></code></a>.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 3.2.0.</span></p>
</div>
<div class="versionchanged">
<p><span class="versionmodified changed">Changed in version 3.4.0: </span>Supports Spark Connect.</p>
</div>
<dl class="field-list">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><dl>
<dt><strong>timeColumn</strong><span class="classifier"><a class="reference internal" href="pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> or str</span></dt><dd><p>The column name or column to use as the timestamp for windowing by time.
The time column must be of TimestampType or TimestampNTZType.</p>
</dd>
<dt><strong>gapDuration</strong><span class="classifier"><a class="reference internal" href="pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a> or str</span></dt><dd><p>A Python string literal or column specifying the timeout of the session. It could be
static value, e.g. <cite>10 minutes</cite>, <cite>1 second</cite>, or an expression/UDF that specifies gap
duration dynamically based on the input row.</p>
</dd>
</dl>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><dl class="simple">
<dt><a class="reference internal" href="pyspark.sql.Column.html#pyspark.sql.Column" title="pyspark.sql.Column"><code class="xref py py-class docutils literal notranslate"><span class="pre">Column</span></code></a></dt><dd><p>the column for computed results.</p>
</dd>
</dl>
</dd>
</dl>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">([(</span><span class="s2">&quot;2016-03-11 09:00:07&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)])</span><span class="o">.</span><span class="n">toDF</span><span class="p">(</span><span class="s2">&quot;date&quot;</span><span class="p">,</span> <span class="s2">&quot;val&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">w</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">session_window</span><span class="p">(</span><span class="s2">&quot;date&quot;</span><span class="p">,</span> <span class="s2">&quot;5 seconds&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="s2">&quot;val&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;sum&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">w</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">w</span><span class="o">.</span><span class="n">session_window</span><span class="o">.</span><span class="n">start</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;string&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;start&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">w</span><span class="o">.</span><span class="n">session_window</span><span class="o">.</span><span class="n">end</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;string&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;end&quot;</span><span class="p">),</span> <span class="s2">&quot;sum&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(start=&#39;2016-03-11 09:00:07&#39;, end=&#39;2016-03-11 09:00:12&#39;, sum=1)]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">w</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="n">session_window</span><span class="p">(</span><span class="s2">&quot;date&quot;</span><span class="p">,</span> <span class="n">lit</span><span class="p">(</span><span class="s2">&quot;5 seconds&quot;</span><span class="p">)))</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="s2">&quot;val&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;sum&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">w</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">w</span><span class="o">.</span><span class="n">session_window</span><span class="o">.</span><span class="n">start</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;string&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;start&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">w</span><span class="o">.</span><span class="n">session_window</span><span class="o">.</span><span class="n">end</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;string&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;end&quot;</span><span class="p">),</span> <span class="s2">&quot;sum&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
<span class="go">[Row(start=&#39;2016-03-11 09:00:07&#39;, end=&#39;2016-03-11 09:00:12&#39;, sum=1)]</span>
</pre></div>
</div>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.sql.functions.window.html" title="previous page">pyspark.sql.functions.window</a>
<a class='right-next' id="next-link" href="pyspark.sql.functions.timestamp_seconds.html" title="next page">pyspark.sql.functions.timestamp_seconds</a>
</div>
</main>
</div>
</div>
<script src="../../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>