blob: da40c5cb6dafff69a04519f4568a060a05cd4c9b [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="Window functions defined for Column."><!-- Inform modern browsers that this page supports both dark and light color schemes,
and the page author prefers light. --><meta name="color-scheme" content="dark light"><script>
// If `prefers-color-scheme` is not supported, fall back to light mode.
// i.e. In this case, inject the `light` CSS before the others, with
// no media filter so that it will be downloaded with highest priority.
if (window.matchMedia("(prefers-color-scheme: dark)").media === "not all") {
document.documentElement.style.display = "none";
document.head.insertAdjacentHTML(
"beforeend",
"<link id=\"css\" rel=\"stylesheet\" href=\"https://bootswatch.com/5/flatly/bootstrap.css\" onload=\"document.documentElement.style.display = ''\">"
);
}
</script><title>Window functions for Column operations — column_window_functions • SparkR</title><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><link href="../deps/font-awesome-6.4.2/css/all.min.css" rel="stylesheet"><link href="../deps/font-awesome-6.4.2/css/v4-shims.min.css" rel="stylesheet"><script src="../deps/headroom-0.11.0/headroom.min.js"></script><script src="../deps/headroom-0.11.0/jQuery.headroom.min.js"></script><script src="../deps/bootstrap-toc-1.0.1/bootstrap-toc.min.js"></script><script src="../deps/clipboard.js-2.0.11/clipboard.min.js"></script><script src="../deps/search-1.0.0/autocomplete.jquery.min.js"></script><script src="../deps/search-1.0.0/fuse.min.js"></script><script src="../deps/search-1.0.0/mark.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet"><meta property="og:title" content="Window functions for Column operations — column_window_functions"><meta property="og:description" content="Window functions defined for Column."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Flatly Theme - Light --><link id="css-light" rel="stylesheet" href="https://bootswatch.com/5/flatly/bootstrap.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><!-- Darkly Theme - Dark --><link id="css-dark" rel="stylesheet" href="https://bootswatch.com/5/darkly/bootstrap.css" media="(prefers-color-scheme: dark)"><!-- preferably CSS --><link rel="stylesheet" href="../preferably.css"><link id="css-code-light" rel="stylesheet" href="../code-color-scheme-light.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><link id="css-code-dark" rel="stylesheet" href="../code-color-scheme-dark.css" media="(prefers-color-scheme: dark)"><script src="../darkswitch.js"></script></head><body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-primary"><div class="container">
<a class="external-link navbar-brand" href="https://spark.apache.org/">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" alt="" max-height="100%"></a>
<a class="navbar-brand me-2" href="../index.html">SparkR</a>
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">4.0.0</small>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-2">
<ul class="navbar-nav me-auto"><li class="active nav-item"><a class="nav-link" href="../reference/index.html">Reference</a></li>
<li class="nav-item dropdown">
<button class="nav-link dropdown-toggle" type="button" id="dropdown-articles" data-bs-toggle="dropdown" aria-expanded="false" aria-haspopup="true">Articles</button>
<ul class="dropdown-menu" aria-labelledby="dropdown-articles"><li><a class="dropdown-item" href="../articles/sparkr-vignettes.html">SparkR - Practical Guide</a></li>
</ul></li>
</ul><form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="" autocomplete="off"></form>
<ul class="navbar-nav"><li class="nav-item"><form class="form-inline" role="search">
<input class="form-control" type="search" name="search-input" id="search-input" autocomplete="off" aria-label="Search site" placeholder="Search for" data-search-index="../search.json"></form></li>
<li>
<a class="external-link nav-link" id="css-toggle-btn" aria-label="github">
<span class="fas fa fas fa-adjust fa-lg"></span>
</a>
</li>
</ul></div>
</div>
</nav><div class="container template-reference-topic">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<h1>Window functions for Column operations</h1>
<div class="d-none name"><code>column_window_functions.Rd</code></div>
</div>
<div class="ref-description section level2">
<p>Window functions defined for <code>Column</code>.</p>
</div>
<div class="section level2">
<h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">cume_dist</span><span class="op">(</span>x <span class="op">=</span> <span class="st">"missing"</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">dense_rank</span><span class="op">(</span>x <span class="op">=</span> <span class="st">"missing"</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">lag</span><span class="op">(</span><span class="va">x</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">lead</span><span class="op">(</span><span class="va">x</span>, <span class="va">offset</span>, defaultValue <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">nth_value</span><span class="op">(</span><span class="va">x</span>, <span class="va">offset</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">ntile</span><span class="op">(</span><span class="va">x</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">percent_rank</span><span class="op">(</span>x <span class="op">=</span> <span class="st">"missing"</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">rank</span><span class="op">(</span><span class="va">x</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="fu">row_number</span><span class="op">(</span>x <span class="op">=</span> <span class="st">"missing"</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'missing'</span></span>
<span><span class="fu">cume_dist</span><span class="op">(</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'missing'</span></span>
<span><span class="fu">dense_rank</span><span class="op">(</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'characterOrColumn'</span></span>
<span><span class="fu">lag</span><span class="op">(</span><span class="va">x</span>, offset <span class="op">=</span> <span class="fl">1</span>, defaultValue <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'characterOrColumn,numeric'</span></span>
<span><span class="fu">lead</span><span class="op">(</span><span class="va">x</span>, offset <span class="op">=</span> <span class="fl">1</span>, defaultValue <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'characterOrColumn,numeric'</span></span>
<span><span class="fu">nth_value</span><span class="op">(</span><span class="va">x</span>, <span class="va">offset</span>, na.rm <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'numeric'</span></span>
<span><span class="fu">ntile</span><span class="op">(</span><span class="va">x</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'missing'</span></span>
<span><span class="fu">percent_rank</span><span class="op">(</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'missing'</span></span>
<span><span class="fu">rank</span><span class="op">(</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'ANY'</span></span>
<span><span class="fu">rank</span><span class="op">(</span><span class="va">x</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for class 'missing'</span></span>
<span><span class="fu">row_number</span><span class="op">(</span><span class="op">)</span></span></code></pre></div>
</div>
<div class="section level2">
<h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h2>
<dl><dt id="arg-x">x<a class="anchor" aria-label="anchor" href="#arg-x"></a></dt>
<dd><p>In <code>lag</code> and <code>lead</code>, it is the column as a character string or a Column
to compute on. In <code>ntile</code>, it is the number of ntile groups.</p></dd>
<dt id="arg--">...<a class="anchor" aria-label="anchor" href="#arg--"></a></dt>
<dd><p>additional argument(s).</p></dd>
<dt id="arg-offset">offset<a class="anchor" aria-label="anchor" href="#arg-offset"></a></dt>
<dd><p>a numeric indicating number of row to use as the value</p></dd>
<dt id="arg-defaultvalue">defaultValue<a class="anchor" aria-label="anchor" href="#arg-defaultvalue"></a></dt>
<dd><p>(optional) default to use when the offset row does not exist.</p></dd>
<dt id="arg-na-rm">na.rm<a class="anchor" aria-label="anchor" href="#arg-na-rm"></a></dt>
<dd><p>a logical which indicates that the Nth value should skip null in the
determination of which row to use</p></dd>
</dl></div>
<div class="section level2">
<h2 id="details">Details<a class="anchor" aria-label="anchor" href="#details"></a></h2>
<p><code>cume_dist</code>: Returns the cumulative distribution of values within a window partition,
i.e. the fraction of rows that are below the current row:
(number of values before and including x) / (total number of rows in the partition).
This is equivalent to the <code>CUME_DIST</code> function in SQL.
The method should be used with no argument.</p>
<p><code>dense_rank</code>: Returns the rank of rows within a window partition, without any gaps.
The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
sequence when there are ties. That is, if you were ranking a competition using dense_rank
and had three people tie for second place, you would say that all three were in second
place and that the next person came in third. Rank would give me sequential numbers, making
the person that came in third place (after the ties) would register as coming in fifth.
This is equivalent to the <code>DENSE_RANK</code> function in SQL.
The method should be used with no argument.</p>
<p><code>lag</code>: Returns the value that is <code>offset</code> rows before the current row, and
<code>defaultValue</code> if there is less than <code>offset</code> rows before the current row. For example,
an <code>offset</code> of one will return the previous row at any given point in the window partition.
This is equivalent to the <code>LAG</code> function in SQL.</p>
<p><code>lead</code>: Returns the value that is <code>offset</code> rows after the current row, and
<code>defaultValue</code> if there is less than <code>offset</code> rows after the current row.
For example, an <code>offset</code> of one will return the next row at any given point
in the window partition.
This is equivalent to the <code>LEAD</code> function in SQL.</p>
<p><code>nth_value</code>: Window function: returns the value that is the <code>offset</code>th
row of the window frame# (counting from 1), and <code>null</code> if the size of window
frame is less than <code>offset</code> rows.</p>
<p><code>ntile</code>: Returns the ntile group id (from 1 to n inclusive) in an ordered window
partition. For example, if n is 4, the first quarter of the rows will get value 1, the second
quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
This is equivalent to the <code>NTILE</code> function in SQL.</p>
<p><code>percent_rank</code>: Returns the relative rank (i.e. percentile) of rows within a window
partition.
This is computed by: (rank of row in its partition - 1) / (number of rows in the partition - 1).
This is equivalent to the <code>PERCENT_RANK</code> function in SQL.
The method should be used with no argument.</p>
<p><code>rank</code>: Returns the rank of rows within a window partition.
The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
sequence when there are ties. That is, if you were ranking a competition using dense_rank
and had three people tie for second place, you would say that all three were in second
place and that the next person came in third. Rank would give me sequential numbers, making
the person that came in third place (after the ties) would register as coming in fifth.
This is equivalent to the <code>RANK</code> function in SQL.
The method should be used with no argument.</p>
<p><code>row_number</code>: Returns a sequential number starting at 1 within a window partition.
This is equivalent to the <code>ROW_NUMBER</code> function in SQL.
The method should be used with no argument.</p>
</div>
<div class="section level2">
<h2 id="note">Note<a class="anchor" aria-label="anchor" href="#note"></a></h2>
<p>cume_dist since 1.6.0</p>
<p>dense_rank since 1.6.0</p>
<p>lag since 1.6.0</p>
<p>lead since 1.6.0</p>
<p>nth_value since 3.1.0</p>
<p>ntile since 1.6.0</p>
<p>percent_rank since 1.6.0</p>
<p>rank since 1.6.0</p>
<p>row_number since 1.6.0</p>
</div>
<div class="section level2">
<h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-examples"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="kw">if</span> <span class="op">(</span><span class="cn">FALSE</span><span class="op">)</span> <span class="op">{</span> <span class="co"># \dontrun{</span></span></span>
<span class="r-in"><span><span class="co"># Dataframe used throughout this doc</span></span></span>
<span class="r-in"><span><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu"><a href="createDataFrame.html">createDataFrame</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/cbind.html" class="external-link">cbind</a></span><span class="op">(</span>model <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/colnames.html" class="external-link">rownames</a></span><span class="op">(</span><span class="va">mtcars</span><span class="op">)</span>, <span class="va">mtcars</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">ws</span> <span class="op">&lt;-</span> <span class="fu"><a href="orderBy.html">orderBy</a></span><span class="op">(</span><span class="fu"><a href="windowPartitionBy.html">windowPartitionBy</a></span><span class="op">(</span><span class="st">"am"</span><span class="op">)</span>, <span class="st">"hp"</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">tmp</span> <span class="op">&lt;-</span> <span class="fu"><a href="mutate.html">mutate</a></span><span class="op">(</span><span class="va">df</span>, dist <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">cume_dist</span><span class="op">(</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>, dense_rank <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">dense_rank</span><span class="op">(</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>,</span></span>
<span class="r-in"><span> lag <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">lag</span><span class="op">(</span><span class="va">df</span><span class="op">$</span><span class="va">mpg</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>, lead <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">lead</span><span class="op">(</span><span class="va">df</span><span class="op">$</span><span class="va">mpg</span>, <span class="fl">1</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>,</span></span>
<span class="r-in"><span> percent_rank <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">percent_rank</span><span class="op">(</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>,</span></span>
<span class="r-in"><span> rank <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">rank</span><span class="op">(</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>, row_number <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">row_number</span><span class="op">(</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span>,</span></span>
<span class="r-in"><span> nth_value <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">nth_value</span><span class="op">(</span><span class="va">df</span><span class="op">$</span><span class="va">mpg</span>, <span class="fl">3</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="co"># Get ntile group id (1-4) for hp</span></span></span>
<span class="r-in"><span><span class="va">tmp</span> <span class="op">&lt;-</span> <span class="fu"><a href="mutate.html">mutate</a></span><span class="op">(</span><span class="va">tmp</span>, ntile <span class="op">=</span> <span class="fu"><a href="over.html">over</a></span><span class="op">(</span><span class="fu">ntile</span><span class="op">(</span><span class="fl">4</span><span class="op">)</span>, <span class="va">ws</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="fu"><a href="head.html">head</a></span><span class="op">(</span><span class="va">tmp</span><span class="op">)</span><span class="op">}</span> <span class="co"># }</span></span></span>
</code></pre></div>
</div>
</main><aside class="col-md-3"><nav id="toc" aria-label="Table of contents"><h2>On this page</h2>
</nav></aside></div>
<footer><div class="copyright">
<p></p><p>Developed by <a href="https://www.apache.org/" class="external-link"> The Apache Software Foundation</a>.</p>
</div>
<div class="pkgdown">
<p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.1.0.</p>
<p class="preferably">Using <a href="https://preferably.amirmasoudabdol.name/?source=footer" class="external-link">preferably</a> template.</p>
</div>
</footer></div>
</body></html>