blob: 4078d37186b5f6fe2b08cc625bf6e9990e75e3b0 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>apache_beam.runners.interactive.interactive_beam module &mdash; Apache Beam documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.runners.interactive.interactive_environment module" href="apache_beam.runners.interactive.interactive_environment.html" />
<link rel="prev" title="apache_beam.runners.interactive.cache_manager module" href="apache_beam.runners.interactive.cache_manager.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Apache Beam
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.internal.html">apache_beam.internal package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.runners.html#subpackages">Subpackages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.dataflow.html">apache_beam.runners.dataflow package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.direct.html">apache_beam.runners.direct package</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="apache_beam.runners.interactive.html#subpackages">Subpackages</a></li>
<li class="toctree-l4 current"><a class="reference internal" href="apache_beam.runners.interactive.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.internal.html">apache_beam.runners.internal package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.job.html">apache_beam.runners.job package</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="apache_beam.runners.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.tools.html">apache_beam.tools package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.version.html">apache_beam.version module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li><a href="apache_beam.runners.html">apache_beam.runners package</a> &raquo;</li>
<li><a href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a> &raquo;</li>
<li>apache_beam.runners.interactive.interactive_beam module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.runners.interactive.interactive_beam.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-apache_beam.runners.interactive.interactive_beam">
<span id="apache-beam-runners-interactive-interactive-beam-module"></span><h1>apache_beam.runners.interactive.interactive_beam module<a class="headerlink" href="#module-apache_beam.runners.interactive.interactive_beam" title="Permalink to this headline"></a></h1>
<p>Module of Interactive Beam features that can be used in notebook.</p>
<p>The purpose of the module is to reduce the learning curve of Interactive Beam
users, provide a single place for importing and add sugar syntax for all
Interactive Beam components. It gives users capability to interact with existing
environment/session/context for Interactive Beam and visualize PCollections as
bounded dataset. In the meantime, it hides the interactivity implementation
from users so that users can focus on developing Beam pipeline without worrying
about how hidden states in the interactive session are managed.</p>
<p>Note: If you want backward-compatibility, only invoke interfaces provided by
this module in your notebook or application code.</p>
<dl class="class">
<dt id="apache_beam.runners.interactive.interactive_beam.Options">
<em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Options</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Options"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.runners.interactive.options.interactive_options.html#apache_beam.runners.interactive.options.interactive_options.InteractiveOptions" title="apache_beam.runners.interactive.options.interactive_options.InteractiveOptions"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.runners.interactive.options.interactive_options.InteractiveOptions</span></code></a></p>
<p>Options that guide how Interactive Beam works.</p>
<dl class="attribute">
<dt id="apache_beam.runners.interactive.interactive_beam.Options.enable_recording_replay">
<code class="descname">enable_recording_replay</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.enable_recording_replay" title="Permalink to this definition"></a></dt>
<dd><p>Whether replayable source data recorded should be replayed for multiple
PCollection evaluations and pipeline runs as long as the data recorded is
still valid.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.runners.interactive.interactive_beam.Options.recordable_sources">
<code class="descname">recordable_sources</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recordable_sources" title="Permalink to this definition"></a></dt>
<dd><p>Interactive Beam automatically records data from sources in this set.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.runners.interactive.interactive_beam.Options.recording_duration">
<code class="descname">recording_duration</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recording_duration" title="Permalink to this definition"></a></dt>
<dd><p>The data recording of sources ends as soon as the background source
recording job has run for this long.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.runners.interactive.interactive_beam.Options.recording_size_limit">
<code class="descname">recording_size_limit</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recording_size_limit" title="Permalink to this definition"></a></dt>
<dd><p>The data recording of sources ends as soon as the size (in bytes) of data
recorded from recordable sources reaches the limit.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.runners.interactive.interactive_beam.Options.display_timestamp_format">
<code class="descname">display_timestamp_format</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.display_timestamp_format" title="Permalink to this definition"></a></dt>
<dd><p>The format in which timestamps are displayed.</p>
<p>Default is ‘%Y-%m-%d %H:%M:%S.%f%z’, e.g. 2020-02-01 15:05:06.000015-08:00.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.runners.interactive.interactive_beam.Options.display_timezone">
<code class="descname">display_timezone</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.display_timezone" title="Permalink to this definition"></a></dt>
<dd><p>The timezone in which timestamps are displayed.</p>
<p>Defaults to local timezone.</p>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.runners.interactive.interactive_beam.Recordings">
<em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Recordings</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.9)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>An introspection interface for recordings for pipelines.</p>
<p>When a user materializes a PCollection onto disk (eg. ib.show) for a streaming
pipeline, a background source recording job is started. This job pulls data
from all defined unbounded sources for that PCollection’s pipeline. The
following methods allow for introspection into that background recording job.</p>
<dl class="method">
<dt id="apache_beam.runners.interactive.interactive_beam.Recordings.describe">
<code class="descname">describe</code><span class="sig-paren">(</span><em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.describe"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.describe" title="Permalink to this definition"></a></dt>
<dd><p>Returns a description of all the recordings for the given pipeline.</p>
<p>If no pipeline is given then this returns a dictionary of descriptions for
all pipelines.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.runners.interactive.interactive_beam.Recordings.clear">
<code class="descname">clear</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.clear"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.clear" title="Permalink to this definition"></a></dt>
<dd><p>Clears all recordings of the given pipeline. Returns True if cleared.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.runners.interactive.interactive_beam.Recordings.stop">
<code class="descname">stop</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.stop"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.stop" title="Permalink to this definition"></a></dt>
<dd><p>Stops the background source recording of the given pipeline.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.runners.interactive.interactive_beam.Recordings.record">
<code class="descname">record</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.record"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.record" title="Permalink to this definition"></a></dt>
<dd><p>Starts a background source recording job for the given pipeline. Returns
True if the recording job was started.</p>
</dd></dl>
</dd></dl>
<dl class="function">
<dt id="apache_beam.runners.interactive.interactive_beam.watch">
<code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">watch</code><span class="sig-paren">(</span><em>watchable</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#watch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.watch" title="Permalink to this definition"></a></dt>
<dd><p>Monitors a watchable.</p>
<p>This allows Interactive Beam to implicitly pass on the information about the
location of your pipeline definition.</p>
<p>Current implementation mainly watches for PCollection variables defined in
user code. A watchable can be a dictionary of variable metadata such as
locals(), a str name of a module, a module object or an instance of a class.
The variable can come from any scope even local variables in a method of a
class defined in a module.</p>
<blockquote>
<div><p>Below are all valid:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">watch</span><span class="p">(</span><span class="n">__main__</span><span class="p">)</span> <span class="c1"># if import __main__ is already invoked</span>
<span class="n">watch</span><span class="p">(</span><span class="s1">&#39;__main__&#39;</span><span class="p">)</span> <span class="c1"># does not require invoking import __main__ beforehand</span>
<span class="n">watch</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="c1"># inside a class</span>
<span class="n">watch</span><span class="p">(</span><span class="n">SomeInstance</span><span class="p">())</span> <span class="c1"># an instance of a class</span>
<span class="n">watch</span><span class="p">(</span><span class="nb">locals</span><span class="p">())</span> <span class="c1"># inside a function, watching local variables within</span>
</pre></div>
</div>
</div></blockquote>
<p>If you write a Beam pipeline in the __main__ module directly, since the
__main__ module is always watched, you don’t have to instruct Interactive
Beam. If your Beam pipeline is defined in some module other than __main__,
such as inside a class function or a unit test, you can watch() the scope.</p>
<blockquote>
<div><p>For example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Foo</span><span class="p">(</span><span class="nb">object</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">run_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">with</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span>
<span class="n">init_pcoll</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">&#39;Init Create&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
<span class="n">watch</span><span class="p">(</span><span class="nb">locals</span><span class="p">())</span>
<span class="k">return</span> <span class="n">init_pcoll</span>
<span class="n">init_pcoll</span> <span class="o">=</span> <span class="n">Foo</span><span class="p">()</span><span class="o">.</span><span class="n">run_pipeline</span><span class="p">()</span>
</pre></div>
</div>
<p>Interactive Beam caches init_pcoll for the first run.</p>
<p>Then you can use:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">show</span><span class="p">(</span><span class="n">init_pcoll</span><span class="p">)</span>
</pre></div>
</div>
<p>To visualize data from init_pcoll once the pipeline is executed.</p>
</div></blockquote>
</dd></dl>
<dl class="function">
<dt id="apache_beam.runners.interactive.interactive_beam.evict_recorded_data">
<code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">evict_recorded_data</code><span class="sig-paren">(</span><em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#evict_recorded_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.evict_recorded_data" title="Permalink to this definition"></a></dt>
<dd><p>Forcefully evicts all recorded replayable data for the given pipeline. If
no pipeline is specified, evicts for all user defined pipelines.</p>
<p>Once invoked, Interactive Beam will record new data based on the guidance of
options the next time it evaluates/visualizes PCollections or runs pipelines.</p>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="apache_beam.runners.interactive.interactive_environment.html" class="btn btn-neutral float-right" title="apache_beam.runners.interactive.interactive_environment module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="apache_beam.runners.interactive.cache_manager.html" class="btn btn-neutral float-left" title="apache_beam.runners.interactive.cache_manager module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>