blob: c996c6eac252390ddb08fe6fbd53b9f64c7df564 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.sql.streaming.DataStreamWriter.toTable &#8212; PySpark 3.3.4 documentation</title>
<link rel="stylesheet" href="../../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.toTable.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="pyspark.sql.streaming.DataStreamWriter.trigger" href="pyspark.sql.streaming.DataStreamWriter.trigger.html" />
<link rel="prev" title="pyspark.sql.streaming.DataStreamWriter.start" href="pyspark.sql.streaming.DataStreamWriter.start.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="../../pyspark.sql/index.html">Spark SQL</a>
</li>
<li class="">
<a href="../../pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="active">
<a href="../index.html">Structured Streaming</a>
<ul>
<li class="">
<a href="../core_classes.html">Core Classes</a>
</li>
<li class="active">
<a href="../io.html">Input/Output</a>
</li>
<li class="">
<a href="../query_management.html">Query Management</a>
</li>
</ul>
</li>
<li class="">
<a href="../../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../../pyspark.streaming.html">Spark Streaming</a>
</li>
<li class="">
<a href="../../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../../pyspark.resource.html">Resource Management</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="pyspark-sql-streaming-datastreamwriter-totable">
<h1>pyspark.sql.streaming.DataStreamWriter.toTable<a class="headerlink" href="#pyspark-sql-streaming-datastreamwriter-totable" title="Permalink to this headline"></a></h1>
<dl class="py method">
<dt id="pyspark.sql.streaming.DataStreamWriter.toTable">
<code class="sig-prename descclassname">DataStreamWriter.</code><code class="sig-name descname">toTable</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">tableName</span><span class="p">:</span> <span class="n">str</span></em>, <em class="sig-param"><span class="n">format</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>str<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">outputMode</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>str<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">partitionBy</span><span class="p">:</span> <span class="n">Union[str, List[str], None]</span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">queryName</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>str<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="n">OptionalPrimitiveType</span></em><span class="sig-paren">)</span> &#x2192; <a class="reference internal" href="pyspark.sql.streaming.StreamingQuery.html#pyspark.sql.streaming.StreamingQuery" title="pyspark.sql.streaming.StreamingQuery">pyspark.sql.streaming.StreamingQuery</a><a class="reference internal" href="../../../_modules/pyspark/sql/streaming.html#DataStreamWriter.toTable"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.sql.streaming.DataStreamWriter.toTable" title="Permalink to this definition"></a></dt>
<dd><p>Starts the execution of the streaming query, which will continually output results to the
given table as new data arrives.</p>
<p>The returned <a class="reference internal" href="pyspark.sql.streaming.StreamingQuery.html#pyspark.sql.streaming.StreamingQuery" title="pyspark.sql.streaming.StreamingQuery"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamingQuery</span></code></a> object can be used to interact with the stream.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 3.1.0.</span></p>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><dl class="simple">
<dt><strong>tableName</strong><span class="classifier">str</span></dt><dd><p>string, for the name of the table.</p>
</dd>
<dt><strong>format</strong><span class="classifier">str, optional</span></dt><dd><p>the format used to save.</p>
</dd>
<dt><strong>outputMode</strong><span class="classifier">str, optional</span></dt><dd><p>specifies how data of a streaming DataFrame/Dataset is written to a
streaming sink.</p>
<ul class="simple">
<li><p><cite>append</cite>: Only the new rows in the streaming DataFrame/Dataset will be written to the
sink</p></li>
<li><p><cite>complete</cite>: All the rows in the streaming DataFrame/Dataset will be written to the
sink every time these are some updates</p></li>
<li><p><cite>update</cite>: only the rows that were updated in the streaming DataFrame/Dataset will be
written to the sink every time there are some updates. If the query doesn’t contain
aggregations, it will be equivalent to <cite>append</cite> mode.</p></li>
</ul>
</dd>
<dt><strong>partitionBy</strong><span class="classifier">str or list, optional</span></dt><dd><p>names of partitioning columns</p>
</dd>
<dt><strong>queryName</strong><span class="classifier">str, optional</span></dt><dd><p>unique name for the query</p>
</dd>
<dt><strong>**options</strong><span class="classifier">dict</span></dt><dd><p>All other string options. You may want to provide a <cite>checkpointLocation</cite>.</p>
</dd>
</dl>
</dd>
</dl>
<p class="rubric">Notes</p>
<p>This API is evolving.</p>
<p>For v1 table, partitioning columns provided by <cite>partitionBy</cite> will be respected no matter
the table exists or not. A new table will be created if the table not exists.</p>
<p>For v2 table, <cite>partitionBy</cite> will be ignored if the table already exists. <cite>partitionBy</cite> will
be respected only if the v2 table does not exist. Besides, the v2 table created by this API
lacks some functionalities (e.g., customized properties, options, and serde info). If you
need them, please create the v2 table manually before the execution to avoid creating a
table with incomplete information.</p>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sdf</span><span class="o">.</span><span class="n">writeStream</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s1">&#39;parquet&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">queryName</span><span class="p">(</span><span class="s1">&#39;query&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">toTable</span><span class="p">(</span><span class="s1">&#39;output_table&#39;</span><span class="p">)</span>
<span class="gp">... </span>
</pre></div>
</div>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">sdf</span><span class="o">.</span><span class="n">writeStream</span><span class="o">.</span><span class="n">trigger</span><span class="p">(</span><span class="n">processingTime</span><span class="o">=</span><span class="s1">&#39;5 seconds&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">toTable</span><span class="p">(</span>
<span class="gp">... </span> <span class="s1">&#39;output_table&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">queryName</span><span class="o">=</span><span class="s1">&#39;that_query&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">outputMode</span><span class="o">=</span><span class="s2">&quot;append&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="nb">format</span><span class="o">=</span><span class="s1">&#39;parquet&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">checkpointLocation</span><span class="o">=</span><span class="s1">&#39;/tmp/checkpoint&#39;</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.sql.streaming.DataStreamWriter.start.html" title="previous page">pyspark.sql.streaming.DataStreamWriter.start</a>
<a class='right-next' id="next-link" href="pyspark.sql.streaming.DataStreamWriter.trigger.html" title="next page">pyspark.sql.streaming.DataStreamWriter.trigger</a>
</div>
</main>
</div>
</div>
<script src="../../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>