blob: 731a75c36813a4cfe022a9aaeccf53aea7082e58 [file] [log] [blame]
<!DOCTYPE html><html><head><title>R: Merges two data frames</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.15.3/dist/katex.min.css">
<script type="text/javascript">
const macros = { "\\R": "\\textsf{R}", "\\code": "\\texttt"};
function processMathHTML() {
var l = document.getElementsByClassName('reqn');
for (let e of l) { katex.render(e.textContent, e, { throwOnError: false, macros }); }
return;
}</script>
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.15.3/dist/katex.min.js"
onload="processMathHTML();"></script>
<link rel="stylesheet" type="text/css" href="R.css" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>
</head><body><div class="container">
<table style="width: 100%;"><tr><td>merge {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table>
<h2>Merges two data frames</h2>
<h3>Description</h3>
<p>Merges two data frames
</p>
<h3>Usage</h3>
<pre><code class='language-R'>merge(x, y, ...)
## S4 method for signature 'SparkDataFrame,SparkDataFrame'
merge(
x,
y,
by = intersect(names(x), names(y)),
by.x = by,
by.y = by,
all = FALSE,
all.x = all,
all.y = all,
sort = TRUE,
suffixes = c("_x", "_y"),
...
)
</code></pre>
<h3>Arguments</h3>
<table>
<tr style="vertical-align: top;"><td><code>x</code></td>
<td>
<p>the first data frame to be joined.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>y</code></td>
<td>
<p>the second data frame to be joined.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>...</code></td>
<td>
<p>additional argument(s) passed to the method.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>by</code></td>
<td>
<p>a character vector specifying the join columns. If by is not
specified, the common column names in <code>x</code> and <code>y</code> will be used.
If by or both by.x and by.y are explicitly set to NULL or of length 0, the Cartesian
Product of x and y will be returned.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>by.x</code></td>
<td>
<p>a character vector specifying the joining columns for x.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>by.y</code></td>
<td>
<p>a character vector specifying the joining columns for y.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>all</code></td>
<td>
<p>a boolean value setting <code>all.x</code> and <code>all.y</code>
if any of them are unset.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>all.x</code></td>
<td>
<p>a boolean value indicating whether all the rows in x should
be including in the join.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>all.y</code></td>
<td>
<p>a boolean value indicating whether all the rows in y should
be including in the join.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>sort</code></td>
<td>
<p>a logical argument indicating whether the resulting columns should be sorted.</p>
</td></tr>
<tr style="vertical-align: top;"><td><code>suffixes</code></td>
<td>
<p>a string vector of length 2 used to make colnames of
<code>x</code> and <code>y</code> unique.
The first element is appended to each colname of <code>x</code>.
The second element is appended to each colname of <code>y</code>.</p>
</td></tr>
</table>
<h3>Details</h3>
<p>If all.x and all.y are set to FALSE, a natural join will be returned. If
all.x is set to TRUE and all.y is set to FALSE, a left outer join will
be returned. If all.x is set to FALSE and all.y is set to TRUE, a right
outer join will be returned. If all.x and all.y are set to TRUE, a full
outer join will be returned.
</p>
<h3>Note</h3>
<p>merge since 1.5.0
</p>
<h3>See Also</h3>
<p><a href="../../SparkR/help/join.html">join</a> <a href="../../SparkR/help/crossJoin.html">crossJoin</a>
</p>
<p>Other SparkDataFrame functions:
<code><a href="../../SparkR/help/SparkDataFrame-class.html">SparkDataFrame-class</a></code>,
<code><a href="../../SparkR/help/agg.html">agg</a>()</code>,
<code><a href="../../SparkR/help/alias.html">alias</a>()</code>,
<code><a href="../../SparkR/help/arrange.html">arrange</a>()</code>,
<code><a href="../../SparkR/help/as.data.frame.html">as.data.frame</a>()</code>,
<code><a href="../../SparkR/help/attach+2CSparkDataFrame-method.html">attach,SparkDataFrame-method</a></code>,
<code><a href="../../SparkR/help/broadcast.html">broadcast</a>()</code>,
<code><a href="../../SparkR/help/cache.html">cache</a>()</code>,
<code><a href="../../SparkR/help/checkpoint.html">checkpoint</a>()</code>,
<code><a href="../../SparkR/help/coalesce.html">coalesce</a>()</code>,
<code><a href="../../SparkR/help/collect.html">collect</a>()</code>,
<code><a href="../../SparkR/help/colnames.html">colnames</a>()</code>,
<code><a href="../../SparkR/help/coltypes.html">coltypes</a>()</code>,
<code><a href="../../SparkR/help/createOrReplaceTempView.html">createOrReplaceTempView</a>()</code>,
<code><a href="../../SparkR/help/crossJoin.html">crossJoin</a>()</code>,
<code><a href="../../SparkR/help/cube.html">cube</a>()</code>,
<code><a href="../../SparkR/help/dapplyCollect.html">dapplyCollect</a>()</code>,
<code><a href="../../SparkR/help/dapply.html">dapply</a>()</code>,
<code><a href="../../SparkR/help/describe.html">describe</a>()</code>,
<code><a href="../../SparkR/help/dim.html">dim</a>()</code>,
<code><a href="../../SparkR/help/distinct.html">distinct</a>()</code>,
<code><a href="../../SparkR/help/dropDuplicates.html">dropDuplicates</a>()</code>,
<code><a href="../../SparkR/help/dropna.html">dropna</a>()</code>,
<code><a href="../../SparkR/help/drop.html">drop</a>()</code>,
<code><a href="../../SparkR/help/dtypes.html">dtypes</a>()</code>,
<code><a href="../../SparkR/help/exceptAll.html">exceptAll</a>()</code>,
<code><a href="../../SparkR/help/except.html">except</a>()</code>,
<code><a href="../../SparkR/help/explain.html">explain</a>()</code>,
<code><a href="../../SparkR/help/filter.html">filter</a>()</code>,
<code><a href="../../SparkR/help/first.html">first</a>()</code>,
<code><a href="../../SparkR/help/gapplyCollect.html">gapplyCollect</a>()</code>,
<code><a href="../../SparkR/help/gapply.html">gapply</a>()</code>,
<code><a href="../../SparkR/help/getNumPartitions.html">getNumPartitions</a>()</code>,
<code><a href="../../SparkR/help/group_by.html">group_by</a>()</code>,
<code><a href="../../SparkR/help/head.html">head</a>()</code>,
<code><a href="../../SparkR/help/hint.html">hint</a>()</code>,
<code><a href="../../SparkR/help/histogram.html">histogram</a>()</code>,
<code><a href="../../SparkR/help/insertInto.html">insertInto</a>()</code>,
<code><a href="../../SparkR/help/intersectAll.html">intersectAll</a>()</code>,
<code><a href="../../SparkR/help/intersect.html">intersect</a>()</code>,
<code><a href="../../SparkR/help/isLocal.html">isLocal</a>()</code>,
<code><a href="../../SparkR/help/isStreaming.html">isStreaming</a>()</code>,
<code><a href="../../SparkR/help/join.html">join</a>()</code>,
<code><a href="../../SparkR/help/limit.html">limit</a>()</code>,
<code><a href="../../SparkR/help/localCheckpoint.html">localCheckpoint</a>()</code>,
<code><a href="../../SparkR/help/mutate.html">mutate</a>()</code>,
<code><a href="../../SparkR/help/ncol.html">ncol</a>()</code>,
<code><a href="../../SparkR/help/nrow.html">nrow</a>()</code>,
<code><a href="../../SparkR/help/persist.html">persist</a>()</code>,
<code><a href="../../SparkR/help/printSchema.html">printSchema</a>()</code>,
<code><a href="../../SparkR/help/randomSplit.html">randomSplit</a>()</code>,
<code><a href="../../SparkR/help/rbind.html">rbind</a>()</code>,
<code><a href="../../SparkR/help/rename.html">rename</a>()</code>,
<code><a href="../../SparkR/help/repartitionByRange.html">repartitionByRange</a>()</code>,
<code><a href="../../SparkR/help/repartition.html">repartition</a>()</code>,
<code><a href="../../SparkR/help/rollup.html">rollup</a>()</code>,
<code><a href="../../SparkR/help/sample.html">sample</a>()</code>,
<code><a href="../../SparkR/help/saveAsTable.html">saveAsTable</a>()</code>,
<code><a href="../../SparkR/help/schema.html">schema</a>()</code>,
<code><a href="../../SparkR/help/selectExpr.html">selectExpr</a>()</code>,
<code><a href="../../SparkR/help/select.html">select</a>()</code>,
<code><a href="../../SparkR/help/showDF.html">showDF</a>()</code>,
<code><a href="../../SparkR/help/show.html">show</a>()</code>,
<code><a href="../../SparkR/help/storageLevel.html">storageLevel</a>()</code>,
<code><a href="../../SparkR/help/str.html">str</a>()</code>,
<code><a href="../../SparkR/help/subset.html">subset</a>()</code>,
<code><a href="../../SparkR/help/summary.html">summary</a>()</code>,
<code><a href="../../SparkR/help/take.html">take</a>()</code>,
<code><a href="../../SparkR/help/toJSON.html">toJSON</a>()</code>,
<code><a href="../../SparkR/help/unionAll.html">unionAll</a>()</code>,
<code><a href="../../SparkR/help/unionByName.html">unionByName</a>()</code>,
<code><a href="../../SparkR/help/union.html">union</a>()</code>,
<code><a href="../../SparkR/help/unpersist.html">unpersist</a>()</code>,
<code><a href="../../SparkR/help/withColumn.html">withColumn</a>()</code>,
<code><a href="../../SparkR/help/withWatermark.html">withWatermark</a>()</code>,
<code><a href="../../SparkR/help/with.html">with</a>()</code>,
<code><a href="../../SparkR/help/write.df.html">write.df</a>()</code>,
<code><a href="../../SparkR/help/write.jdbc.html">write.jdbc</a>()</code>,
<code><a href="../../SparkR/help/write.json.html">write.json</a>()</code>,
<code><a href="../../SparkR/help/write.orc.html">write.orc</a>()</code>,
<code><a href="../../SparkR/help/write.parquet.html">write.parquet</a>()</code>,
<code><a href="../../SparkR/help/write.stream.html">write.stream</a>()</code>,
<code><a href="../../SparkR/help/write.text.html">write.text</a>()</code>
</p>
<h3>Examples</h3>
<pre><code class="r">## Not run:
##D sparkR.session()
##D df1 &lt;- read.json(path)
##D df2 &lt;- read.json(path2)
##D merge(df1, df2) # Performs an inner join by common columns
##D merge(df1, df2, by = &quot;col1&quot;) # Performs an inner join based on expression
##D merge(df1, df2, by.x = &quot;col1&quot;, by.y = &quot;col2&quot;, all.y = TRUE)
##D merge(df1, df2, by.x = &quot;col1&quot;, by.y = &quot;col2&quot;, all.x = TRUE)
##D merge(df1, df2, by.x = &quot;col1&quot;, by.y = &quot;col2&quot;, all.x = TRUE, all.y = TRUE)
##D merge(df1, df2, by.x = &quot;col1&quot;, by.y = &quot;col2&quot;, all = TRUE, sort = FALSE)
##D merge(df1, df2, by = &quot;col1&quot;, all = TRUE, suffixes = c(&quot;-X&quot;, &quot;-Y&quot;))
##D merge(df1, df2, by = NULL) # Performs a Cartesian join
## End(Not run)
</code></pre>
<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 3.2.2 <a href="00Index.html">Index</a>]</div>
</div>
</body></html>