blob: ea328a010f9e17c491b1b7ede876cc15193490c2 [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="The data source is specified by the source and a set of options (...).
If source is not specified, the default data source configured by
spark.sql.sources.default will be used."><!-- Inform modern browsers that this page supports both dark and light color schemes,
and the page author prefers light. --><meta name="color-scheme" content="dark light"><script>
// If `prefers-color-scheme` is not supported, fall back to light mode.
// i.e. In this case, inject the `light` CSS before the others, with
// no media filter so that it will be downloaded with highest priority.
if (window.matchMedia("(prefers-color-scheme: dark)").media === "not all") {
document.documentElement.style.display = "none";
document.head.insertAdjacentHTML(
"beforeend",
"<link id=\"css\" rel=\"stylesheet\" href=\"https://bootswatch.com/5/flatly/bootstrap.css\" onload=\"document.documentElement.style.display = ''\">"
);
}
</script><title>Write the streaming SparkDataFrame to a data source. — write.stream • SparkR</title><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet"><meta property="og:title" content="Write the streaming SparkDataFrame to a data source. — write.stream"><meta property="og:description" content="The data source is specified by the source and a set of options (...).
If source is not specified, the default data source configured by
spark.sql.sources.default will be used."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Flatly Theme - Light --><link id="css-light" rel="stylesheet" href="https://bootswatch.com/5/flatly/bootstrap.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><!-- Darkly Theme - Dark --><link id="css-dark" rel="stylesheet" href="https://bootswatch.com/5/darkly/bootstrap.css" media="(prefers-color-scheme: dark)"><!-- preferably CSS --><link rel="stylesheet" href="../preferably.css"><link id="css-code-light" rel="stylesheet" href="../code-color-scheme-light.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><link id="css-code-dark" rel="stylesheet" href="../code-color-scheme-dark.css" media="(prefers-color-scheme: dark)"><script src="../darkswitch.js"></script></head><body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-primary"><div class="container">
<a class="external-link navbar-brand" href="https://spark.apache.org/">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" alt="" max-height="100%"></a>
<a class="navbar-brand me-2" href="../index.html">SparkR</a>
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">4.0.0</small>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-2">
<ul class="navbar-nav me-auto"><li class="active nav-item">
<a class="nav-link" href="../reference/index.html">Reference</a>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
<div class="dropdown-menu" aria-labelledby="dropdown-articles">
<a class="dropdown-item" href="../articles/sparkr-vignettes.html">SparkR - Practical Guide</a>
</div>
</li>
</ul><form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
<ul class="navbar-nav"><li>
<a class="external-link nav-link" id="css-toggle-btn" aria-label="github">
<span class="fas fa fas fa-adjust fa-lg"></span>
</a>
</li>
</ul></div>
</div>
</nav><div class="container template-reference-topic">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<img src="" class="logo" alt=""><h1>Write the streaming SparkDataFrame to a data source.</h1>
<div class="d-none name"><code>write.stream.Rd</code></div>
</div>
<div class="ref-description section level2">
<p>The data source is specified by the <code>source</code> and a set of options (...).
If <code>source</code> is not specified, the default data source configured by
spark.sql.sources.default will be used.</p>
</div>
<div class="section level2">
<h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">write.stream</span><span class="op">(</span><span class="va">df</span>, source <span class="op">=</span> <span class="cn">NULL</span>, outputMode <span class="op">=</span> <span class="cn">NULL</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for SparkDataFrame</span></span>
<span><span class="fu">write.stream</span><span class="op">(</span></span>
<span> <span class="va">df</span>,</span>
<span> source <span class="op">=</span> <span class="cn">NULL</span>,</span>
<span> outputMode <span class="op">=</span> <span class="cn">NULL</span>,</span>
<span> partitionBy <span class="op">=</span> <span class="cn">NULL</span>,</span>
<span> trigger.processingTime <span class="op">=</span> <span class="cn">NULL</span>,</span>
<span> trigger.once <span class="op">=</span> <span class="cn">NULL</span>,</span>
<span> <span class="va">...</span></span>
<span><span class="op">)</span></span></code></pre></div>
</div>
<div class="section level2">
<h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h2>
<dl><dt>df</dt>
<dd><p>a streaming SparkDataFrame.</p></dd>
<dt>source</dt>
<dd><p>a name for external data source.</p></dd>
<dt>outputMode</dt>
<dd><p>one of 'append', 'complete', 'update'.</p></dd>
<dt>...</dt>
<dd><p>additional external data source specific named options.</p></dd>
<dt>partitionBy</dt>
<dd><p>a name or a list of names of columns to partition the output by on the file
system. If specified, the output is laid out on the file system similar to Hive's
partitioning scheme.</p></dd>
<dt>trigger.processingTime</dt>
<dd><p>a processing time interval as a string, e.g. '5 seconds',
'1 minute'. This is a trigger that runs a query periodically based on the processing
time. If value is '0 seconds', the query will run as fast as possible, this is the
default. Only one trigger can be set.</p></dd>
<dt>trigger.once</dt>
<dd><p>a logical, must be set to <code>TRUE</code>. This is a trigger that processes only
one batch of data in a streaming query then terminates the query. Only one trigger can be
set.</p></dd>
</dl></div>
<div class="section level2">
<h2 id="details">Details<a class="anchor" aria-label="anchor" href="#details"></a></h2>
<p>Additionally, <code>outputMode</code> specifies how data of a streaming SparkDataFrame is written to a
output data source. There are three modes:</p><ul><li><p>append: Only the new rows in the streaming SparkDataFrame will be written out. This
output mode can be only be used in queries that do not contain any aggregation.</p></li>
<li><p>complete: All the rows in the streaming SparkDataFrame will be written out every time
there are some updates. This output mode can only be used in queries that
contain aggregations.</p></li>
<li><p>update: Only the rows that were updated in the streaming SparkDataFrame will be written
out every time there are some updates. If the query doesn't contain aggregations,
it will be equivalent to <code>append</code> mode.</p></li>
</ul></div>
<div class="section level2">
<h2 id="note">Note<a class="anchor" aria-label="anchor" href="#note"></a></h2>
<p>write.stream since 2.2.0</p>
<p>experimental</p>
</div>
<div class="section level2">
<h2 id="see-also">See also<a class="anchor" aria-label="anchor" href="#see-also"></a></h2>
<div class="dont-index"><p><a href="read.stream.html">read.stream</a></p>
<p>Other SparkDataFrame functions:
<code><a href="SparkDataFrame.html">SparkDataFrame-class</a></code>,
<code><a href="summarize.html">agg</a>()</code>,
<code><a href="alias.html">alias</a>()</code>,
<code><a href="arrange.html">arrange</a>()</code>,
<code><a href="as.data.frame.html">as.data.frame</a>()</code>,
<code><a href="attach.html">attach,SparkDataFrame-method</a></code>,
<code><a href="broadcast.html">broadcast</a>()</code>,
<code><a href="cache.html">cache</a>()</code>,
<code><a href="checkpoint.html">checkpoint</a>()</code>,
<code><a href="coalesce.html">coalesce</a>()</code>,
<code><a href="collect.html">collect</a>()</code>,
<code><a href="columns.html">colnames</a>()</code>,
<code><a href="coltypes.html">coltypes</a>()</code>,
<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a>()</code>,
<code><a href="crossJoin.html">crossJoin</a>()</code>,
<code><a href="cube.html">cube</a>()</code>,
<code><a href="dapplyCollect.html">dapplyCollect</a>()</code>,
<code><a href="dapply.html">dapply</a>()</code>,
<code><a href="describe.html">describe</a>()</code>,
<code><a href="dim.html">dim</a>()</code>,
<code><a href="distinct.html">distinct</a>()</code>,
<code><a href="dropDuplicates.html">dropDuplicates</a>()</code>,
<code><a href="nafunctions.html">dropna</a>()</code>,
<code><a href="drop.html">drop</a>()</code>,
<code><a href="dtypes.html">dtypes</a>()</code>,
<code><a href="exceptAll.html">exceptAll</a>()</code>,
<code><a href="except.html">except</a>()</code>,
<code><a href="explain.html">explain</a>()</code>,
<code><a href="filter.html">filter</a>()</code>,
<code><a href="first.html">first</a>()</code>,
<code><a href="gapplyCollect.html">gapplyCollect</a>()</code>,
<code><a href="gapply.html">gapply</a>()</code>,
<code><a href="getNumPartitions.html">getNumPartitions</a>()</code>,
<code><a href="groupBy.html">group_by</a>()</code>,
<code><a href="head.html">head</a>()</code>,
<code><a href="hint.html">hint</a>()</code>,
<code><a href="histogram.html">histogram</a>()</code>,
<code><a href="insertInto.html">insertInto</a>()</code>,
<code><a href="intersectAll.html">intersectAll</a>()</code>,
<code><a href="intersect.html">intersect</a>()</code>,
<code><a href="isLocal.html">isLocal</a>()</code>,
<code><a href="isStreaming.html">isStreaming</a>()</code>,
<code><a href="join.html">join</a>()</code>,
<code><a href="limit.html">limit</a>()</code>,
<code><a href="localCheckpoint.html">localCheckpoint</a>()</code>,
<code><a href="merge.html">merge</a>()</code>,
<code><a href="mutate.html">mutate</a>()</code>,
<code><a href="ncol.html">ncol</a>()</code>,
<code><a href="nrow.html">nrow</a>()</code>,
<code><a href="persist.html">persist</a>()</code>,
<code><a href="printSchema.html">printSchema</a>()</code>,
<code><a href="randomSplit.html">randomSplit</a>()</code>,
<code><a href="rbind.html">rbind</a>()</code>,
<code><a href="rename.html">rename</a>()</code>,
<code><a href="repartitionByRange.html">repartitionByRange</a>()</code>,
<code><a href="repartition.html">repartition</a>()</code>,
<code><a href="rollup.html">rollup</a>()</code>,
<code><a href="sample.html">sample</a>()</code>,
<code><a href="saveAsTable.html">saveAsTable</a>()</code>,
<code><a href="schema.html">schema</a>()</code>,
<code><a href="selectExpr.html">selectExpr</a>()</code>,
<code><a href="select.html">select</a>()</code>,
<code><a href="showDF.html">showDF</a>()</code>,
<code><a href="show.html">show</a>()</code>,
<code><a href="storageLevel.html">storageLevel</a>()</code>,
<code><a href="str.html">str</a>()</code>,
<code><a href="subset.html">subset</a>()</code>,
<code><a href="summary.html">summary</a>()</code>,
<code><a href="take.html">take</a>()</code>,
<code><a href="toJSON.html">toJSON</a>()</code>,
<code><a href="unionAll.html">unionAll</a>()</code>,
<code><a href="unionByName.html">unionByName</a>()</code>,
<code><a href="union.html">union</a>()</code>,
<code><a href="unpersist.html">unpersist</a>()</code>,
<code><a href="unpivot.html">unpivot</a>()</code>,
<code><a href="withColumn.html">withColumn</a>()</code>,
<code><a href="withWatermark.html">withWatermark</a>()</code>,
<code><a href="with.html">with</a>()</code>,
<code><a href="write.df.html">write.df</a>()</code>,
<code><a href="write.jdbc.html">write.jdbc</a>()</code>,
<code><a href="write.json.html">write.json</a>()</code>,
<code><a href="write.orc.html">write.orc</a>()</code>,
<code><a href="write.parquet.html">write.parquet</a>()</code>,
<code><a href="write.text.html">write.text</a>()</code></p></div>
</div>
<div class="section level2">
<h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-examples"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="kw">if</span> <span class="op">(</span><span class="cn">FALSE</span><span class="op">)</span> <span class="op">{</span></span></span>
<span class="r-in"><span><span class="fu"><a href="sparkR.session.html">sparkR.session</a></span><span class="op">(</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu"><a href="read.stream.html">read.stream</a></span><span class="op">(</span><span class="st">"socket"</span>, host <span class="op">=</span> <span class="st">"localhost"</span>, port <span class="op">=</span> <span class="fl">9999</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="fu"><a href="isStreaming.html">isStreaming</a></span><span class="op">(</span><span class="va">df</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">wordCounts</span> <span class="op">&lt;-</span> <span class="fu"><a href="count.html">count</a></span><span class="op">(</span><span class="fu"><a href="groupBy.html">group_by</a></span><span class="op">(</span><span class="va">df</span>, <span class="st">"value"</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="co"># console</span></span></span>
<span class="r-in"><span><span class="va">q</span> <span class="op">&lt;-</span> <span class="fu">write.stream</span><span class="op">(</span><span class="va">wordCounts</span>, <span class="st">"console"</span>, outputMode <span class="op">=</span> <span class="st">"complete"</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="co"># text stream</span></span></span>
<span class="r-in"><span><span class="va">q</span> <span class="op">&lt;-</span> <span class="fu">write.stream</span><span class="op">(</span><span class="va">df</span>, <span class="st">"text"</span>, path <span class="op">=</span> <span class="st">"/home/user/out"</span>, checkpointLocation <span class="op">=</span> <span class="st">"/home/user/cp"</span>,</span></span>
<span class="r-in"><span> partitionBy <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"year"</span>, <span class="st">"month"</span><span class="op">)</span>, trigger.processingTime <span class="op">=</span> <span class="st">"30 seconds"</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="co"># memory stream</span></span></span>
<span class="r-in"><span><span class="va">q</span> <span class="op">&lt;-</span> <span class="fu">write.stream</span><span class="op">(</span><span class="va">wordCounts</span>, <span class="st">"memory"</span>, queryName <span class="op">=</span> <span class="st">"outs"</span>, outputMode <span class="op">=</span> <span class="st">"complete"</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="fu"><a href="head.html">head</a></span><span class="op">(</span><span class="fu"><a href="sql.html">sql</a></span><span class="op">(</span><span class="st">"SELECT * from outs"</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="fu"><a href="queryName.html">queryName</a></span><span class="op">(</span><span class="va">q</span><span class="op">)</span></span></span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="fu"><a href="stopQuery.html">stopQuery</a></span><span class="op">(</span><span class="va">q</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="op">}</span></span></span>
</code></pre></div>
</div>
</main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
</nav></aside></div>
<footer><div class="copyright">
<p></p><p>Developed by <a href="https://www.apache.org/" class="external-link"> The Apache Software Foundation</a>.</p>
</div>
<div class="pkgdown">
<p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.9.</p>
<p class="preferably">Using <a href="https://preferably.amirmasoudabdol.name/?source=footer" class="external-link">preferably</a> template.</p>
</div>
</footer></div>
</body></html>