| <!DOCTYPE html><html><head><title>R: Non-aggregate functions for Column operations</title> |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" /> |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.15.3/dist/katex.min.css"> |
| <script type="text/javascript"> |
| const macros = { "\\R": "\\textsf{R}", "\\code": "\\texttt"}; |
| function processMathHTML() { |
| var l = document.getElementsByClassName('reqn'); |
| for (let e of l) { katex.render(e.textContent, e, { throwOnError: false, macros }); } |
| return; |
| }</script> |
| <script defer src="https://cdn.jsdelivr.net/npm/katex@0.15.3/dist/katex.min.js" |
| onload="processMathHTML();"></script> |
| <link rel="stylesheet" type="text/css" href="R.css" /> |
| |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> |
| <script>hljs.initHighlightingOnLoad();</script> |
| </head><body><div class="container"> |
| |
| <table style="width: 100%;"><tr><td>column_nonaggregate_functions {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> |
| |
| <h2>Non-aggregate functions for Column operations</h2> |
| |
| <h3>Description</h3> |
| |
| <p>Non-aggregate functions defined for <code>Column</code>. |
| </p> |
| |
| |
| <h3>Usage</h3> |
| |
| <pre><code class='language-R'>when(condition, value) |
| |
| bitwise_not(x) |
| |
| bitwiseNOT(x) |
| |
| create_array(x, ...) |
| |
| create_map(x, ...) |
| |
| expr(x) |
| |
| greatest(x, ...) |
| |
| input_file_name(x = "missing") |
| |
| isnan(x) |
| |
| least(x, ...) |
| |
| lit(x) |
| |
| monotonically_increasing_id(x = "missing") |
| |
| nanvl(y, x) |
| |
| negate(x) |
| |
| rand(seed) |
| |
| randn(seed) |
| |
| spark_partition_id(x = "missing") |
| |
| struct(x, ...) |
| |
| ## S4 method for signature 'ANY' |
| lit(x) |
| |
| ## S4 method for signature 'Column' |
| bitwise_not(x) |
| |
| ## S4 method for signature 'Column' |
| bitwiseNOT(x) |
| |
| ## S4 method for signature 'Column' |
| coalesce(x, ...) |
| |
| ## S4 method for signature 'Column' |
| isnan(x) |
| |
| ## S4 method for signature 'Column' |
| is.nan(x) |
| |
| ## S4 method for signature 'missing' |
| monotonically_increasing_id() |
| |
| ## S4 method for signature 'Column' |
| negate(x) |
| |
| ## S4 method for signature 'missing' |
| spark_partition_id() |
| |
| ## S4 method for signature 'characterOrColumn' |
| struct(x, ...) |
| |
| ## S4 method for signature 'Column' |
| nanvl(y, x) |
| |
| ## S4 method for signature 'Column' |
| greatest(x, ...) |
| |
| ## S4 method for signature 'Column' |
| least(x, ...) |
| |
| ## S4 method for signature 'character' |
| expr(x) |
| |
| ## S4 method for signature 'missing' |
| rand(seed) |
| |
| ## S4 method for signature 'numeric' |
| rand(seed) |
| |
| ## S4 method for signature 'missing' |
| randn(seed) |
| |
| ## S4 method for signature 'numeric' |
| randn(seed) |
| |
| ## S4 method for signature 'Column' |
| when(condition, value) |
| |
| ## S4 method for signature 'Column' |
| ifelse(test, yes, no) |
| |
| ## S4 method for signature 'Column' |
| create_array(x, ...) |
| |
| ## S4 method for signature 'Column' |
| create_map(x, ...) |
| |
| ## S4 method for signature 'missing' |
| input_file_name() |
| </code></pre> |
| |
| |
| <h3>Arguments</h3> |
| |
| <table> |
| <tr style="vertical-align: top;"><td><code>condition</code></td> |
| <td> |
| <p>the condition to test on. Must be a Column expression.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>value</code></td> |
| <td> |
| <p>result expression.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>x</code></td> |
| <td> |
| <p>Column to compute on. In <code>lit</code>, it is a literal value or a Column. |
| In <code>expr</code>, it contains an expression character object to be parsed.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>...</code></td> |
| <td> |
| <p>additional Columns.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>y</code></td> |
| <td> |
| <p>Column to compute on.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>seed</code></td> |
| <td> |
| <p>a random seed. Can be missing.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>test</code></td> |
| <td> |
| <p>a Column expression that describes the condition.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>yes</code></td> |
| <td> |
| <p>return values for <code>TRUE</code> elements of test.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>no</code></td> |
| <td> |
| <p>return values for <code>FALSE</code> elements of test.</p> |
| </td></tr> |
| </table> |
| |
| |
| <h3>Details</h3> |
| |
| <p><code>lit</code>: A new Column is created to represent the literal value. |
| If the parameter is a Column, it is returned unchanged. |
| </p> |
| <p><code>bitwise_not</code>: Computes bitwise NOT. |
| </p> |
| <p><code>bitwiseNOT</code>: Computes bitwise NOT. |
| </p> |
| <p><code>coalesce</code>: Returns the first column that is not NA, or NA if all inputs are. |
| </p> |
| <p><code>isnan</code>: Returns true if the column is NaN. |
| </p> |
| <p><code>is.nan</code>: Alias for <a href="../../SparkR/help/isnan.html">isnan</a>. |
| </p> |
| <p><code>monotonically_increasing_id</code>: Returns a column that generates monotonically increasing |
| 64-bit integers. The generated ID is guaranteed to be monotonically increasing and unique, |
| but not consecutive. The current implementation puts the partition ID in the upper 31 bits, |
| and the record number within each partition in the lower 33 bits. The assumption is that the |
| SparkDataFrame has less than 1 billion partitions, and each partition has less than 8 billion |
| records. As an example, consider a SparkDataFrame with two partitions, each with 3 records. |
| This expression would return the following IDs: |
| 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. |
| This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL. |
| The method should be used with no argument. |
| Note: the function is non-deterministic because its result depends on partition IDs. |
| </p> |
| <p><code>negate</code>: Unary minus, i.e. negate the expression. |
| </p> |
| <p><code>spark_partition_id</code>: Returns the partition ID as a SparkDataFrame column. |
| Note that this is nondeterministic because it depends on data partitioning and |
| task scheduling. |
| This is equivalent to the <code>SPARK_PARTITION_ID</code> function in SQL. |
| </p> |
| <p><code>struct</code>: Creates a new struct column that composes multiple input columns. |
| </p> |
| <p><code>nanvl</code>: Returns the first column (<code>y</code>) if it is not NaN, or the second column |
| (<code>x</code>) if the first column is NaN. Both inputs should be floating point columns |
| (DoubleType or FloatType). |
| </p> |
| <p><code>greatest</code>: Returns the greatest value of the list of column names, skipping null values. |
| This function takes at least 2 parameters. It will return null if all parameters are null. |
| </p> |
| <p><code>least</code>: Returns the least value of the list of column names, skipping null values. |
| This function takes at least 2 parameters. It will return null if all parameters are null. |
| </p> |
| <p><code>expr</code>: Parses the expression string into the column that it represents, similar to |
| <code>SparkDataFrame.selectExpr</code> |
| </p> |
| <p><code>rand</code>: Generates a random column with independent and identically distributed (i.i.d.) |
| samples uniformly distributed in [0.0, 1.0). |
| Note: the function is non-deterministic in general case. |
| </p> |
| <p><code>randn</code>: Generates a column with independent and identically distributed (i.i.d.) samples |
| from the standard normal distribution. |
| Note: the function is non-deterministic in general case. |
| </p> |
| <p><code>when</code>: Evaluates a list of conditions and returns one of multiple possible result |
| expressions. For unmatched expressions null is returned. |
| </p> |
| <p><code>ifelse</code>: Evaluates a list of conditions and returns <code>yes</code> if the conditions are |
| satisfied. Otherwise <code>no</code> is returned for unmatched conditions. |
| </p> |
| <p><code>create_array</code>: Creates a new array column. The input columns must all have the same data |
| type. |
| </p> |
| <p><code>create_map</code>: Creates a new map column. The input columns must be grouped as key-value |
| pairs, e.g. (key1, value1, key2, value2, ...). |
| The key columns must all have the same data type, and can't be null. |
| The value columns must all have the same data type. |
| </p> |
| <p><code>input_file_name</code>: Creates a string column with the input file name for a given row. |
| The method should be used with no argument. |
| </p> |
| |
| |
| <h3>Note</h3> |
| |
| <p>lit since 1.5.0 |
| </p> |
| <p>bitwise_not since 3.2.0 |
| </p> |
| <p>bitwiseNOT since 1.5.0 |
| </p> |
| <p>coalesce(Column) since 2.1.1 |
| </p> |
| <p>isnan since 2.0.0 |
| </p> |
| <p>is.nan since 2.0.0 |
| </p> |
| <p>negate since 1.5.0 |
| </p> |
| <p>spark_partition_id since 2.0.0 |
| </p> |
| <p>struct since 1.6.0 |
| </p> |
| <p>nanvl since 1.5.0 |
| </p> |
| <p>greatest since 1.5.0 |
| </p> |
| <p>least since 1.5.0 |
| </p> |
| <p>expr since 1.5.0 |
| </p> |
| <p>rand since 1.5.0 |
| </p> |
| <p>rand(numeric) since 1.5.0 |
| </p> |
| <p>randn since 1.5.0 |
| </p> |
| <p>randn(numeric) since 1.5.0 |
| </p> |
| <p>when since 1.5.0 |
| </p> |
| <p>ifelse since 1.5.0 |
| </p> |
| <p>create_array since 2.3.0 |
| </p> |
| <p>create_map since 2.3.0 |
| </p> |
| <p>input_file_name since 2.3.0 |
| </p> |
| |
| |
| <h3>See Also</h3> |
| |
| <p>coalesce,SparkDataFrame-method |
| </p> |
| <p>Other non-aggregate functions: |
| <code><a href="../../SparkR/help/column.html">column</a>()</code>, |
| <code><a href="../../SparkR/help/not.html">not</a>()</code> |
| </p> |
| |
| |
| <h3>Examples</h3> |
| |
| <pre><code class="r">## Not run: |
| ##D # Dataframe used throughout this doc |
| ##D df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D tmp <- mutate(df, v1 = lit(df$mpg), v2 = lit("x"), v3 = lit("2015-01-01"), |
| ##D v4 = negate(df$mpg), v5 = expr('length(model)'), |
| ##D v6 = greatest(df$vs, df$am), v7 = least(df$vs, df$am), |
| ##D v8 = column("mpg")) |
| ##D head(tmp) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D head(select(df, bitwise_not(cast(df$vs, "int")))) |
| ## End(Not run) |
| |
| ## Not run: head(select(df, monotonically_increasing_id())) |
| |
| ## Not run: head(select(df, spark_partition_id())) |
| |
| ## Not run: |
| ##D tmp <- mutate(df, v1 = struct(df$mpg, df$cyl), v2 = struct("hp", "wt", "vs"), |
| ##D v3 = create_array(df$mpg, df$cyl, df$hp), |
| ##D v4 = create_map(lit("x"), lit(1.0), lit("y"), lit(-1.0))) |
| ##D head(tmp) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D tmp <- mutate(df, r1 = rand(), r2 = rand(10), r3 = randn(), r4 = randn(10)) |
| ##D head(tmp) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D tmp <- mutate(df, mpg_na = otherwise(when(df$mpg > 20, df$mpg), lit(NaN)), |
| ##D mpg2 = ifelse(df$mpg > 20 & df$am > 0, 0, 1), |
| ##D mpg3 = ifelse(df$mpg > 20, df$mpg, 20.0)) |
| ##D head(tmp) |
| ##D tmp <- mutate(tmp, ind_na1 = is.nan(tmp$mpg_na), ind_na2 = isnan(tmp$mpg_na)) |
| ##D head(select(tmp, coalesce(tmp$mpg_na, tmp$mpg))) |
| ##D head(select(tmp, nanvl(tmp$mpg_na, tmp$hp))) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D tmp <- read.text("README.md") |
| ##D head(select(tmp, input_file_name())) |
| ## End(Not run) |
| </code></pre> |
| |
| |
| <hr /><div style="text-align: center;">[Package <em>SparkR</em> version 3.2.2 <a href="00Index.html">Index</a>]</div> |
| </div> |
| </body></html> |