| <!DOCTYPE html><html><head><title>R: gapplyCollect</title> |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" /> |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.15.3/dist/katex.min.css"> |
| <script type="text/javascript"> |
| const macros = { "\\R": "\\textsf{R}", "\\code": "\\texttt"}; |
| function processMathHTML() { |
| var l = document.getElementsByClassName('reqn'); |
| for (let e of l) { katex.render(e.textContent, e, { throwOnError: false, macros }); } |
| return; |
| }</script> |
| <script defer src="https://cdn.jsdelivr.net/npm/katex@0.15.3/dist/katex.min.js" |
| onload="processMathHTML();"></script> |
| <link rel="stylesheet" type="text/css" href="R.css" /> |
| |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> |
| <script>hljs.initHighlightingOnLoad();</script> |
| </head><body><div class="container"> |
| |
| <table style="width: 100%;"><tr><td>gapplyCollect {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> |
| |
| <h2>gapplyCollect</h2> |
| |
| <h3>Description</h3> |
| |
| <p>Groups the SparkDataFrame using the specified columns, applies the R function to each |
| group and collects the result back to R as data.frame. |
| </p> |
| |
| |
| <h3>Usage</h3> |
| |
| <pre><code class='language-R'>gapplyCollect(x, ...) |
| |
| ## S4 method for signature 'GroupedData' |
| gapplyCollect(x, func) |
| |
| ## S4 method for signature 'SparkDataFrame' |
| gapplyCollect(x, cols, func) |
| </code></pre> |
| |
| |
| <h3>Arguments</h3> |
| |
| <table> |
| <tr style="vertical-align: top;"><td><code>x</code></td> |
| <td> |
| <p>a SparkDataFrame or GroupedData.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>...</code></td> |
| <td> |
| <p>additional argument(s) passed to the method.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>func</code></td> |
| <td> |
| <p>a function to be applied to each group partition specified by grouping |
| column of the SparkDataFrame. See Details.</p> |
| </td></tr> |
| <tr style="vertical-align: top;"><td><code>cols</code></td> |
| <td> |
| <p>grouping columns.</p> |
| </td></tr> |
| </table> |
| |
| |
| <h3>Details</h3> |
| |
| <p><code>func</code> is a function of two arguments. The first, usually named <code>key</code> |
| (though this is not enforced) corresponds to the grouping key, will be an |
| unnamed <code>list</code> of <code>length(cols)</code> length-one objects corresponding |
| to the grouping columns' values for the current group. |
| </p> |
| <p>The second, herein <code>x</code>, will be a local <code><a href="../../base/html/data.frame.html">data.frame</a></code> with the |
| columns of the input not in <code>cols</code> for the rows corresponding to <code>key</code>. |
| </p> |
| <p>The output of <code>func</code> must be a <code>data.frame</code> matching <code>schema</code> – |
| in particular this means the names of the output <code>data.frame</code> are irrelevant |
| </p> |
| |
| |
| <h3>Value</h3> |
| |
| <p>A data.frame. |
| </p> |
| |
| |
| <h3>Note</h3> |
| |
| <p>gapplyCollect(GroupedData) since 2.0.0 |
| </p> |
| <p>gapplyCollect(SparkDataFrame) since 2.0.0 |
| </p> |
| |
| |
| <h3>See Also</h3> |
| |
| <p><a href="../../SparkR/help/gapply.html">gapply</a> |
| </p> |
| <p>Other SparkDataFrame functions: |
| <code><a href="../../SparkR/help/SparkDataFrame-class.html">SparkDataFrame-class</a></code>, |
| <code><a href="../../SparkR/help/agg.html">agg</a>()</code>, |
| <code><a href="../../SparkR/help/alias.html">alias</a>()</code>, |
| <code><a href="../../SparkR/help/arrange.html">arrange</a>()</code>, |
| <code><a href="../../SparkR/help/as.data.frame.html">as.data.frame</a>()</code>, |
| <code><a href="../../SparkR/help/attach+2CSparkDataFrame-method.html">attach,SparkDataFrame-method</a></code>, |
| <code><a href="../../SparkR/help/broadcast.html">broadcast</a>()</code>, |
| <code><a href="../../SparkR/help/cache.html">cache</a>()</code>, |
| <code><a href="../../SparkR/help/checkpoint.html">checkpoint</a>()</code>, |
| <code><a href="../../SparkR/help/coalesce.html">coalesce</a>()</code>, |
| <code><a href="../../SparkR/help/collect.html">collect</a>()</code>, |
| <code><a href="../../SparkR/help/colnames.html">colnames</a>()</code>, |
| <code><a href="../../SparkR/help/coltypes.html">coltypes</a>()</code>, |
| <code><a href="../../SparkR/help/createOrReplaceTempView.html">createOrReplaceTempView</a>()</code>, |
| <code><a href="../../SparkR/help/crossJoin.html">crossJoin</a>()</code>, |
| <code><a href="../../SparkR/help/cube.html">cube</a>()</code>, |
| <code><a href="../../SparkR/help/dapplyCollect.html">dapplyCollect</a>()</code>, |
| <code><a href="../../SparkR/help/dapply.html">dapply</a>()</code>, |
| <code><a href="../../SparkR/help/describe.html">describe</a>()</code>, |
| <code><a href="../../SparkR/help/dim.html">dim</a>()</code>, |
| <code><a href="../../SparkR/help/distinct.html">distinct</a>()</code>, |
| <code><a href="../../SparkR/help/dropDuplicates.html">dropDuplicates</a>()</code>, |
| <code><a href="../../SparkR/help/dropna.html">dropna</a>()</code>, |
| <code><a href="../../SparkR/help/drop.html">drop</a>()</code>, |
| <code><a href="../../SparkR/help/dtypes.html">dtypes</a>()</code>, |
| <code><a href="../../SparkR/help/exceptAll.html">exceptAll</a>()</code>, |
| <code><a href="../../SparkR/help/except.html">except</a>()</code>, |
| <code><a href="../../SparkR/help/explain.html">explain</a>()</code>, |
| <code><a href="../../SparkR/help/filter.html">filter</a>()</code>, |
| <code><a href="../../SparkR/help/first.html">first</a>()</code>, |
| <code><a href="../../SparkR/help/gapply.html">gapply</a>()</code>, |
| <code><a href="../../SparkR/help/getNumPartitions.html">getNumPartitions</a>()</code>, |
| <code><a href="../../SparkR/help/group_by.html">group_by</a>()</code>, |
| <code><a href="../../SparkR/help/head.html">head</a>()</code>, |
| <code><a href="../../SparkR/help/hint.html">hint</a>()</code>, |
| <code><a href="../../SparkR/help/histogram.html">histogram</a>()</code>, |
| <code><a href="../../SparkR/help/insertInto.html">insertInto</a>()</code>, |
| <code><a href="../../SparkR/help/intersectAll.html">intersectAll</a>()</code>, |
| <code><a href="../../SparkR/help/intersect.html">intersect</a>()</code>, |
| <code><a href="../../SparkR/help/isLocal.html">isLocal</a>()</code>, |
| <code><a href="../../SparkR/help/isStreaming.html">isStreaming</a>()</code>, |
| <code><a href="../../SparkR/help/join.html">join</a>()</code>, |
| <code><a href="../../SparkR/help/limit.html">limit</a>()</code>, |
| <code><a href="../../SparkR/help/localCheckpoint.html">localCheckpoint</a>()</code>, |
| <code><a href="../../SparkR/help/merge.html">merge</a>()</code>, |
| <code><a href="../../SparkR/help/mutate.html">mutate</a>()</code>, |
| <code><a href="../../SparkR/help/ncol.html">ncol</a>()</code>, |
| <code><a href="../../SparkR/help/nrow.html">nrow</a>()</code>, |
| <code><a href="../../SparkR/help/persist.html">persist</a>()</code>, |
| <code><a href="../../SparkR/help/printSchema.html">printSchema</a>()</code>, |
| <code><a href="../../SparkR/help/randomSplit.html">randomSplit</a>()</code>, |
| <code><a href="../../SparkR/help/rbind.html">rbind</a>()</code>, |
| <code><a href="../../SparkR/help/rename.html">rename</a>()</code>, |
| <code><a href="../../SparkR/help/repartitionByRange.html">repartitionByRange</a>()</code>, |
| <code><a href="../../SparkR/help/repartition.html">repartition</a>()</code>, |
| <code><a href="../../SparkR/help/rollup.html">rollup</a>()</code>, |
| <code><a href="../../SparkR/help/sample.html">sample</a>()</code>, |
| <code><a href="../../SparkR/help/saveAsTable.html">saveAsTable</a>()</code>, |
| <code><a href="../../SparkR/help/schema.html">schema</a>()</code>, |
| <code><a href="../../SparkR/help/selectExpr.html">selectExpr</a>()</code>, |
| <code><a href="../../SparkR/help/select.html">select</a>()</code>, |
| <code><a href="../../SparkR/help/showDF.html">showDF</a>()</code>, |
| <code><a href="../../SparkR/help/show.html">show</a>()</code>, |
| <code><a href="../../SparkR/help/storageLevel.html">storageLevel</a>()</code>, |
| <code><a href="../../SparkR/help/str.html">str</a>()</code>, |
| <code><a href="../../SparkR/help/subset.html">subset</a>()</code>, |
| <code><a href="../../SparkR/help/summary.html">summary</a>()</code>, |
| <code><a href="../../SparkR/help/take.html">take</a>()</code>, |
| <code><a href="../../SparkR/help/toJSON.html">toJSON</a>()</code>, |
| <code><a href="../../SparkR/help/unionAll.html">unionAll</a>()</code>, |
| <code><a href="../../SparkR/help/unionByName.html">unionByName</a>()</code>, |
| <code><a href="../../SparkR/help/union.html">union</a>()</code>, |
| <code><a href="../../SparkR/help/unpersist.html">unpersist</a>()</code>, |
| <code><a href="../../SparkR/help/withColumn.html">withColumn</a>()</code>, |
| <code><a href="../../SparkR/help/withWatermark.html">withWatermark</a>()</code>, |
| <code><a href="../../SparkR/help/with.html">with</a>()</code>, |
| <code><a href="../../SparkR/help/write.df.html">write.df</a>()</code>, |
| <code><a href="../../SparkR/help/write.jdbc.html">write.jdbc</a>()</code>, |
| <code><a href="../../SparkR/help/write.json.html">write.json</a>()</code>, |
| <code><a href="../../SparkR/help/write.orc.html">write.orc</a>()</code>, |
| <code><a href="../../SparkR/help/write.parquet.html">write.parquet</a>()</code>, |
| <code><a href="../../SparkR/help/write.stream.html">write.stream</a>()</code>, |
| <code><a href="../../SparkR/help/write.text.html">write.text</a>()</code> |
| </p> |
| |
| |
| <h3>Examples</h3> |
| |
| <pre><code class="r">## Not run: |
| ##D # Computes the arithmetic mean of the second column by grouping |
| ##D # on the first and third columns. Output the grouping values and the average. |
| ##D |
| ##D df <- createDataFrame ( |
| ##D list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)), |
| ##D c("a", "b", "c", "d")) |
| ##D |
| ##D result <- gapplyCollect( |
| ##D df, |
| ##D c("a", "c"), |
| ##D function(key, x) { |
| ##D y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE) |
| ##D colnames(y) <- c("key_a", "key_c", "mean_b") |
| ##D y |
| ##D }) |
| ##D |
| ##D # We can also group the data and afterwards call gapply on GroupedData. |
| ##D # For example: |
| ##D gdf <- group_by(df, "a", "c") |
| ##D result <- gapplyCollect( |
| ##D gdf, |
| ##D function(key, x) { |
| ##D y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE) |
| ##D colnames(y) <- c("key_a", "key_c", "mean_b") |
| ##D y |
| ##D }) |
| ##D |
| ##D # Result |
| ##D # ------ |
| ##D # key_a key_c mean_b |
| ##D # 3 3 3.0 |
| ##D # 1 1 1.5 |
| ##D |
| ##D # Fits linear models on iris dataset by grouping on the 'Species' column and |
| ##D # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length' |
| ##D # and 'Petal_Width' as training features. |
| ##D |
| ##D df <- createDataFrame (iris) |
| ##D result <- gapplyCollect( |
| ##D df, |
| ##D df$"Species", |
| ##D function(key, x) { |
| ##D m <- suppressWarnings(lm(Sepal_Length ~ |
| ##D Sepal_Width + Petal_Length + Petal_Width, x)) |
| ##D data.frame(t(coef(m))) |
| ##D }) |
| ##D |
| ##D # Result |
| ##D # --------- |
| ##D # Model X.Intercept. Sepal_Width Petal_Length Petal_Width |
| ##D # 1 0.699883 0.3303370 0.9455356 -0.1697527 |
| ##D # 2 1.895540 0.3868576 0.9083370 -0.6792238 |
| ##D # 3 2.351890 0.6548350 0.2375602 0.2521257 |
| ##D |
| ## End(Not run) |
| </code></pre> |
| |
| |
| <hr /><div style="text-align: center;">[Package <em>SparkR</em> version 3.2.2 <a href="00Index.html">Index</a>]</div> |
| </div> |
| </body></html> |