| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Collection functions for Column operations</title> |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| <link rel="stylesheet" type="text/css" href="R.css" /> |
| |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> |
| <script>hljs.initHighlightingOnLoad();</script> |
| </head><body> |
| |
| <table width="100%" summary="page for column_collection_functions {SparkR}"><tr><td>column_collection_functions {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> |
| |
| <h2>Collection functions for Column operations</h2> |
| |
| <h3>Description</h3> |
| |
| <p>Collection functions defined for <code>Column</code>. |
| </p> |
| |
| |
| <h3>Usage</h3> |
| |
| <pre> |
| array_contains(x, value) |
| |
| array_distinct(x) |
| |
| array_except(x, y) |
| |
| array_intersect(x, y) |
| |
| array_join(x, delimiter, ...) |
| |
| array_max(x) |
| |
| array_min(x) |
| |
| array_position(x, value) |
| |
| array_remove(x, value) |
| |
| array_repeat(x, count) |
| |
| array_sort(x) |
| |
| arrays_overlap(x, y) |
| |
| array_union(x, y) |
| |
| arrays_zip(x, ...) |
| |
| concat(x, ...) |
| |
| element_at(x, extraction) |
| |
| explode(x) |
| |
| explode_outer(x) |
| |
| flatten(x) |
| |
| from_json(x, schema, ...) |
| |
| map_from_arrays(x, y) |
| |
| map_keys(x) |
| |
| map_values(x) |
| |
| posexplode(x) |
| |
| posexplode_outer(x) |
| |
| reverse(x) |
| |
| shuffle(x) |
| |
| size(x) |
| |
| slice(x, start, length) |
| |
| sort_array(x, asc = TRUE) |
| |
| to_json(x, ...) |
| |
| ## S4 method for signature 'Column' |
| reverse(x) |
| |
| ## S4 method for signature 'Column' |
| to_json(x, ...) |
| |
| ## S4 method for signature 'Column' |
| concat(x, ...) |
| |
| ## S4 method for signature 'Column,characterOrstructType' |
| from_json(x, schema, |
| as.json.array = FALSE, ...) |
| |
| ## S4 method for signature 'Column' |
| array_contains(x, value) |
| |
| ## S4 method for signature 'Column' |
| array_distinct(x) |
| |
| ## S4 method for signature 'Column,Column' |
| array_except(x, y) |
| |
| ## S4 method for signature 'Column,Column' |
| array_intersect(x, y) |
| |
| ## S4 method for signature 'Column,character' |
| array_join(x, delimiter, |
| nullReplacement = NULL) |
| |
| ## S4 method for signature 'Column' |
| array_max(x) |
| |
| ## S4 method for signature 'Column' |
| array_min(x) |
| |
| ## S4 method for signature 'Column' |
| array_position(x, value) |
| |
| ## S4 method for signature 'Column' |
| array_remove(x, value) |
| |
| ## S4 method for signature 'Column,numericOrColumn' |
| array_repeat(x, count) |
| |
| ## S4 method for signature 'Column' |
| array_sort(x) |
| |
| ## S4 method for signature 'Column,Column' |
| arrays_overlap(x, y) |
| |
| ## S4 method for signature 'Column,Column' |
| array_union(x, y) |
| |
| ## S4 method for signature 'Column' |
| arrays_zip(x, ...) |
| |
| ## S4 method for signature 'Column' |
| shuffle(x) |
| |
| ## S4 method for signature 'Column' |
| flatten(x) |
| |
| ## S4 method for signature 'Column,Column' |
| map_from_arrays(x, y) |
| |
| ## S4 method for signature 'Column' |
| map_keys(x) |
| |
| ## S4 method for signature 'Column' |
| map_values(x) |
| |
| ## S4 method for signature 'Column' |
| element_at(x, extraction) |
| |
| ## S4 method for signature 'Column' |
| explode(x) |
| |
| ## S4 method for signature 'Column' |
| size(x) |
| |
| ## S4 method for signature 'Column' |
| slice(x, start, length) |
| |
| ## S4 method for signature 'Column' |
| sort_array(x, asc = TRUE) |
| |
| ## S4 method for signature 'Column' |
| posexplode(x) |
| |
| ## S4 method for signature 'Column' |
| explode_outer(x) |
| |
| ## S4 method for signature 'Column' |
| posexplode_outer(x) |
| </pre> |
| |
| |
| <h3>Arguments</h3> |
| |
| <table summary="R argblock"> |
| <tr valign="top"><td><code>x</code></td> |
| <td> |
| <p>Column to compute on. Note the difference in the following methods: |
| </p> |
| |
| <ul> |
| <li> <p><code>to_json</code>: it is the column containing the struct, array of the structs, |
| the map or array of maps. |
| </p> |
| </li> |
| <li> <p><code>from_json</code>: it is the column containing the JSON string. |
| </p> |
| </li></ul> |
| </td></tr> |
| <tr valign="top"><td><code>value</code></td> |
| <td> |
| <p>A value to compute on. |
| </p> |
| |
| <ul> |
| <li> <p><code>array_contains</code>: a value to be checked if contained in the column. |
| </p> |
| </li> |
| <li> <p><code>array_position</code>: a value to locate in the given array. |
| </p> |
| </li> |
| <li> <p><code>array_remove</code>: a value to remove in the given array. |
| </p> |
| </li></ul> |
| </td></tr> |
| <tr valign="top"><td><code>y</code></td> |
| <td> |
| <p>Column to compute on.</p> |
| </td></tr> |
| <tr valign="top"><td><code>delimiter</code></td> |
| <td> |
| <p>a character string that is used to concatenate the elements of column.</p> |
| </td></tr> |
| <tr valign="top"><td><code>...</code></td> |
| <td> |
| <p>additional argument(s). In <code>to_json</code> and <code>from_json</code>, this contains |
| additional named properties to control how it is converted, accepts the same |
| options as the JSON data source. In <code>arrays_zip</code>, this contains additional |
| Columns of arrays to be merged.</p> |
| </td></tr> |
| <tr valign="top"><td><code>count</code></td> |
| <td> |
| <p>a Column or constant determining the number of repetitions.</p> |
| </td></tr> |
| <tr valign="top"><td><code>extraction</code></td> |
| <td> |
| <p>index to check for in array or key to check for in map</p> |
| </td></tr> |
| <tr valign="top"><td><code>schema</code></td> |
| <td> |
| <p>a structType object to use as the schema to use when parsing the JSON string. |
| Since Spark 2.3, the DDL-formatted string is also supported for the schema.</p> |
| </td></tr> |
| <tr valign="top"><td><code>start</code></td> |
| <td> |
| <p>an index indicating the first element occurring in the result.</p> |
| </td></tr> |
| <tr valign="top"><td><code>length</code></td> |
| <td> |
| <p>a number of consecutive elements chosen to the result.</p> |
| </td></tr> |
| <tr valign="top"><td><code>asc</code></td> |
| <td> |
| <p>a logical flag indicating the sorting order. |
| TRUE, sorting is in ascending order. |
| FALSE, sorting is in descending order.</p> |
| </td></tr> |
| <tr valign="top"><td><code>as.json.array</code></td> |
| <td> |
| <p>indicating if input string is JSON array of objects or a single object.</p> |
| </td></tr> |
| <tr valign="top"><td><code>nullReplacement</code></td> |
| <td> |
| <p>an optional character string that is used to replace the Null values.</p> |
| </td></tr> |
| </table> |
| |
| |
| <h3>Details</h3> |
| |
| <p><code>reverse</code>: Returns a reversed string or an array with reverse order of elements. |
| </p> |
| <p><code>to_json</code>: Converts a column containing a <code>structType</code>, a <code>mapType</code> |
| or an <code>arrayType</code> into a Column of JSON string. |
| Resolving the Column can fail if an unsupported type is encountered. |
| </p> |
| <p><code>concat</code>: Concatenates multiple input columns together into a single column. |
| The function works with strings, binary and compatible array columns. |
| </p> |
| <p><code>from_json</code>: Parses a column containing a JSON string into a Column of <code>structType</code> |
| with the specified <code>schema</code> or array of <code>structType</code> if <code>as.json.array</code> is set |
| to <code>TRUE</code>. If the string is unparseable, the Column will contain the value NA. |
| </p> |
| <p><code>array_contains</code>: Returns null if the array is null, true if the array contains |
| the value, and false otherwise. |
| </p> |
| <p><code>array_distinct</code>: Removes duplicate values from the array. |
| </p> |
| <p><code>array_except</code>: Returns an array of the elements in the first array but not in the second |
| array, without duplicates. The order of elements in the result is not determined. |
| </p> |
| <p><code>array_intersect</code>: Returns an array of the elements in the intersection of the given two |
| arrays, without duplicates. |
| </p> |
| <p><code>array_join</code>: Concatenates the elements of column using the delimiter. |
| Null values are replaced with nullReplacement if set, otherwise they are ignored. |
| </p> |
| <p><code>array_max</code>: Returns the maximum value of the array. |
| </p> |
| <p><code>array_min</code>: Returns the minimum value of the array. |
| </p> |
| <p><code>array_position</code>: Locates the position of the first occurrence of the given value |
| in the given array. Returns NA if either of the arguments are NA. |
| Note: The position is not zero based, but 1 based index. Returns 0 if the given |
| value could not be found in the array. |
| </p> |
| <p><code>array_remove</code>: Removes all elements that equal to element from the given array. |
| </p> |
| <p><code>array_repeat</code>: Creates an array containing <code>x</code> repeated the number of times |
| given by <code>count</code>. |
| </p> |
| <p><code>array_sort</code>: Sorts the input array in ascending order. The elements of the input array |
| must be orderable. NA elements will be placed at the end of the returned array. |
| </p> |
| <p><code>arrays_overlap</code>: Returns true if the input arrays have at least one non-null element in |
| common. If not and both arrays are non-empty and any of them contains a null, it returns null. |
| It returns false otherwise. |
| </p> |
| <p><code>array_union</code>: Returns an array of the elements in the union of the given two arrays, |
| without duplicates. |
| </p> |
| <p><code>arrays_zip</code>: Returns a merged array of structs in which the N-th struct contains all N-th |
| values of input arrays. |
| </p> |
| <p><code>shuffle</code>: Returns a random permutation of the given array. |
| </p> |
| <p><code>flatten</code>: Creates a single array from an array of arrays. |
| If a structure of nested arrays is deeper than two levels, only one level of nesting is removed. |
| </p> |
| <p><code>map_from_arrays</code>: Creates a new map column. The array in the first column is used for |
| keys. The array in the second column is used for values. All elements in the array for key |
| should not be null. |
| </p> |
| <p><code>map_keys</code>: Returns an unordered array containing the keys of the map. |
| </p> |
| <p><code>map_values</code>: Returns an unordered array containing the values of the map. |
| </p> |
| <p><code>element_at</code>: Returns element of array at given index in <code>extraction</code> if |
| <code>x</code> is array. Returns value for the given key in <code>extraction</code> if <code>x</code> is map. |
| Note: The position is not zero based, but 1 based index. |
| </p> |
| <p><code>explode</code>: Creates a new row for each element in the given array or map column. |
| </p> |
| <p><code>size</code>: Returns length of array or map. |
| </p> |
| <p><code>slice</code>: Returns an array containing all the elements in x from the index start |
| (or starting from the end if start is negative) with the specified length. |
| </p> |
| <p><code>sort_array</code>: Sorts the input array in ascending or descending order according to |
| the natural ordering of the array elements. NA elements will be placed at the beginning of |
| the returned array in ascending order or at the end of the returned array in descending order. |
| </p> |
| <p><code>posexplode</code>: Creates a new row for each element with position in the given array |
| or map column. |
| </p> |
| <p><code>explode</code>: Creates a new row for each element in the given array or map column. |
| Unlike <code>explode</code>, if the array/map is <code>null</code> or empty |
| then <code>null</code> is produced. |
| </p> |
| <p><code>posexplode_outer</code>: Creates a new row for each element with position in the given |
| array or map column. Unlike <code>posexplode</code>, if the array/map is <code>null</code> or empty |
| then the row (<code>null</code>, <code>null</code>) is produced. |
| </p> |
| |
| |
| <h3>Note</h3> |
| |
| <p>reverse since 1.5.0 |
| </p> |
| <p>to_json since 2.2.0 |
| </p> |
| <p>concat since 1.5.0 |
| </p> |
| <p>from_json since 2.2.0 |
| </p> |
| <p>array_contains since 1.6.0 |
| </p> |
| <p>array_distinct since 2.4.0 |
| </p> |
| <p>array_except since 2.4.0 |
| </p> |
| <p>array_intersect since 2.4.0 |
| </p> |
| <p>array_join since 2.4.0 |
| </p> |
| <p>array_max since 2.4.0 |
| </p> |
| <p>array_min since 2.4.0 |
| </p> |
| <p>array_position since 2.4.0 |
| </p> |
| <p>array_remove since 2.4.0 |
| </p> |
| <p>array_repeat since 2.4.0 |
| </p> |
| <p>array_sort since 2.4.0 |
| </p> |
| <p>arrays_overlap since 2.4.0 |
| </p> |
| <p>array_union since 2.4.0 |
| </p> |
| <p>arrays_zip since 2.4.0 |
| </p> |
| <p>shuffle since 2.4.0 |
| </p> |
| <p>flatten since 2.4.0 |
| </p> |
| <p>map_from_arrays since 2.4.0 |
| </p> |
| <p>map_keys since 2.3.0 |
| </p> |
| <p>map_values since 2.3.0 |
| </p> |
| <p>element_at since 2.4.0 |
| </p> |
| <p>explode since 1.5.0 |
| </p> |
| <p>size since 1.5.0 |
| </p> |
| <p>slice since 2.4.0 |
| </p> |
| <p>sort_array since 1.6.0 |
| </p> |
| <p>posexplode since 2.1.0 |
| </p> |
| <p>explode_outer since 2.3.0 |
| </p> |
| <p>posexplode_outer since 2.3.0 |
| </p> |
| |
| |
| <h3>Examples</h3> |
| |
| <pre><code class="r">## Not run: |
| ##D # Dataframe used throughout this doc |
| ##D df <- createDataFrame(cbind(model = rownames(mtcars), mtcars)) |
| ##D tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp)) |
| ##D head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1), shuffle(tmp$v1))) |
| ##D head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1))) |
| ##D head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1))) |
| ##D head(select(tmp, flatten(tmp$v1), reverse(tmp$v1), array_remove(tmp$v1, 21))) |
| ##D tmp2 <- mutate(tmp, v2 = explode(tmp$v1)) |
| ##D head(tmp2) |
| ##D head(select(tmp, posexplode(tmp$v1))) |
| ##D head(select(tmp, slice(tmp$v1, 2L, 2L))) |
| ##D head(select(tmp, sort_array(tmp$v1))) |
| ##D head(select(tmp, sort_array(tmp$v1, asc = FALSE))) |
| ##D tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl)) |
| ##D head(select(tmp3, map_keys(tmp3$v3), map_values(tmp3$v3))) |
| ##D head(select(tmp3, element_at(tmp3$v3, "Valiant"))) |
| ##D tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp)) |
| ##D head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5))) |
| ##D head(select(tmp4, array_except(tmp4$v4, tmp4$v5), array_intersect(tmp4$v4, tmp4$v5))) |
| ##D head(select(tmp4, array_union(tmp4$v4, tmp4$v5))) |
| ##D head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5), map_from_arrays(tmp4$v4, tmp4$v5))) |
| ##D head(select(tmp, concat(df$mpg, df$cyl, df$hp))) |
| ##D tmp5 <- mutate(df, v6 = create_array(df$model, df$model)) |
| ##D head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL"))) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D # Converts a struct into a JSON object |
| ##D df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d") |
| ##D select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy')) |
| ##D |
| ##D # Converts an array of structs into a JSON array |
| ##D df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people") |
| ##D df2 <- mutate(df2, people_json = to_json(df2$people)) |
| ##D |
| ##D # Converts a map into a JSON object |
| ##D df2 <- sql("SELECT map('name', 'Bob')) as people") |
| ##D df2 <- mutate(df2, people_json = to_json(df2$people)) |
| ##D |
| ##D # Converts an array of maps into a JSON array |
| ##D df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people") |
| ##D df2 <- mutate(df2, people_json = to_json(df2$people)) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d") |
| ##D df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy')) |
| ##D schema <- structType(structField("date", "string")) |
| ##D head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy'))) |
| ##D df2 <- sql("SELECT named_struct('name', 'Bob') as people") |
| ##D df2 <- mutate(df2, people_json = to_json(df2$people)) |
| ##D schema <- structType(structField("name", "string")) |
| ##D head(select(df2, from_json(df2$people_json, schema))) |
| ##D head(select(df2, from_json(df2$people_json, "name STRING"))) |
| ## End(Not run) |
| |
| ## Not run: |
| ##D df2 <- createDataFrame(data.frame( |
| ##D id = c(1, 2, 3), text = c("a,b,c", NA, "d,e") |
| ##D )) |
| ##D |
| ##D head(select(df2, df2$id, explode_outer(split_string(df2$text, ",")))) |
| ##D head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ",")))) |
| ## End(Not run) |
| </code></pre> |
| |
| |
| <hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.4.0 <a href="00Index.html">Index</a>]</div> |
| </body></html> |