| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>apache_beam.runners.interactive.interactive_beam module — Apache Beam documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" src="_static/js/modernizr.min.js"></script> |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script> |
| <script type="text/javascript" src="_static/jquery.js"></script> |
| <script type="text/javascript" src="_static/underscore.js"></script> |
| <script type="text/javascript" src="_static/doctools.js"></script> |
| <script type="text/javascript" src="_static/language_data.js"></script> |
| <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| |
| <script type="text/javascript" src="_static/js/theme.js"></script> |
| |
| |
| |
| |
| <link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| <link rel="next" title="apache_beam.runners.interactive.interactive_environment module" href="apache_beam.runners.interactive.interactive_environment.html" /> |
| <link rel="prev" title="apache_beam.runners.interactive.cache_manager module" href="apache_beam.runners.interactive.cache_manager.html" /> |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> Apache Beam |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li> |
| <li class="toctree-l1 current"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="reference internal" href="apache_beam.runners.html#subpackages">Subpackages</a><ul class="current"> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.dataflow.html">apache_beam.runners.dataflow package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.direct.html">apache_beam.runners.direct package</a></li> |
| <li class="toctree-l3 current"><a class="reference internal" href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a><ul class="current"> |
| <li class="toctree-l4"><a class="reference internal" href="apache_beam.runners.interactive.html#subpackages">Subpackages</a></li> |
| <li class="toctree-l4 current"><a class="reference internal" href="apache_beam.runners.interactive.html#submodules">Submodules</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.job.html">apache_beam.runners.job package</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="apache_beam.runners.html#submodules">Submodules</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">Apache Beam</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="index.html">Docs</a> »</li> |
| |
| <li><a href="apache_beam.runners.html">apache_beam.runners package</a> »</li> |
| |
| <li><a href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a> »</li> |
| |
| <li>apache_beam.runners.interactive.interactive_beam module</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="_sources/apache_beam.runners.interactive.interactive_beam.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="module-apache_beam.runners.interactive.interactive_beam"> |
| <span id="apache-beam-runners-interactive-interactive-beam-module"></span><h1>apache_beam.runners.interactive.interactive_beam module<a class="headerlink" href="#module-apache_beam.runners.interactive.interactive_beam" title="Permalink to this headline">¶</a></h1> |
| <p>Module of Interactive Beam features that can be used in notebook.</p> |
| <p>The purpose of the module is to reduce the learning curve of Interactive Beam |
| users, provide a single place for importing and add sugar syntax for all |
| Interactive Beam components. It gives users capability to interact with existing |
| environment/session/context for Interactive Beam and visualize PCollections as |
| bounded dataset. In the meantime, it hides the interactivity implementation |
| from users so that users can focus on developing Beam pipeline without worrying |
| about how hidden states in the interactive session are managed.</p> |
| <p>Note: If you want backward-compatibility, only invoke interfaces provided by |
| this module in your notebook or application code.</p> |
| <dl class="class"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options"> |
| <em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Options</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Options"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="apache_beam.runners.interactive.options.interactive_options.html#apache_beam.runners.interactive.options.interactive_options.InteractiveOptions" title="apache_beam.runners.interactive.options.interactive_options.InteractiveOptions"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.runners.interactive.options.interactive_options.InteractiveOptions</span></code></a></p> |
| <p>Options that guide how Interactive Beam works.</p> |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.enable_recording_replay"> |
| <code class="descname">enable_recording_replay</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.enable_recording_replay" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Whether replayable source data recorded should be replayed for multiple |
| PCollection evaluations and pipeline runs as long as the data recorded is |
| still valid.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.recordable_sources"> |
| <code class="descname">recordable_sources</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recordable_sources" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Interactive Beam automatically records data from sources in this set.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.recording_duration"> |
| <code class="descname">recording_duration</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recording_duration" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The data recording of sources ends as soon as the background source |
| recording job has run for this long.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.recording_size_limit"> |
| <code class="descname">recording_size_limit</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recording_size_limit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The data recording of sources ends as soon as the size (in bytes) of data |
| recorded from recordable sources reaches the limit.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.display_timestamp_format"> |
| <code class="descname">display_timestamp_format</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.display_timestamp_format" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The format in which timestamps are displayed.</p> |
| <p>Default is ‘%Y-%m-%d %H:%M:%S.%f%z’, e.g. 2020-02-01 15:05:06.000015-08:00.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.display_timezone"> |
| <code class="descname">display_timezone</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.display_timezone" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The timezone in which timestamps are displayed.</p> |
| <p>Defaults to local timezone.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings"> |
| <em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Recordings</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.9)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>An introspection interface for recordings for pipelines.</p> |
| <p>When a user materializes a PCollection onto disk (eg. ib.show) for a streaming |
| pipeline, a background source recording job is started. This job pulls data |
| from all defined unbounded sources for that PCollection’s pipeline. The |
| following methods allow for introspection into that background recording job.</p> |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.describe"> |
| <code class="descname">describe</code><span class="sig-paren">(</span><em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.describe"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.describe" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a description of all the recordings for the given pipeline.</p> |
| <p>If no pipeline is given then this returns a dictionary of descriptions for |
| all pipelines.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.clear"> |
| <code class="descname">clear</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.clear"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.clear" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears all recordings of the given pipeline. Returns True if cleared.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.stop"> |
| <code class="descname">stop</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.stop"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.stop" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Stops the background source recording of the given pipeline.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.record"> |
| <code class="descname">record</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.record"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.record" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Starts a background source recording job for the given pipeline. Returns |
| True if the recording job was started.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.watch"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">watch</code><span class="sig-paren">(</span><em>watchable</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#watch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.watch" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Monitors a watchable.</p> |
| <p>This allows Interactive Beam to implicitly pass on the information about the |
| location of your pipeline definition.</p> |
| <p>Current implementation mainly watches for PCollection variables defined in |
| user code. A watchable can be a dictionary of variable metadata such as |
| locals(), a str name of a module, a module object or an instance of a class. |
| The variable can come from any scope even local variables in a method of a |
| class defined in a module.</p> |
| <blockquote> |
| <div><p>Below are all valid:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">watch</span><span class="p">(</span><span class="n">__main__</span><span class="p">)</span> <span class="c1"># if import __main__ is already invoked</span> |
| <span class="n">watch</span><span class="p">(</span><span class="s1">'__main__'</span><span class="p">)</span> <span class="c1"># does not require invoking import __main__ beforehand</span> |
| <span class="n">watch</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="c1"># inside a class</span> |
| <span class="n">watch</span><span class="p">(</span><span class="n">SomeInstance</span><span class="p">())</span> <span class="c1"># an instance of a class</span> |
| <span class="n">watch</span><span class="p">(</span><span class="nb">locals</span><span class="p">())</span> <span class="c1"># inside a function, watching local variables within</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| <p>If you write a Beam pipeline in the __main__ module directly, since the |
| __main__ module is always watched, you don’t have to instruct Interactive |
| Beam. If your Beam pipeline is defined in some module other than __main__, |
| such as inside a class function or a unit test, you can watch() the scope.</p> |
| <blockquote> |
| <div><p>For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Foo</span><span class="p">(</span><span class="nb">object</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">run_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">with</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span> |
| <span class="n">init_pcoll</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'Init Create'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span> |
| <span class="n">watch</span><span class="p">(</span><span class="nb">locals</span><span class="p">())</span> |
| <span class="k">return</span> <span class="n">init_pcoll</span> |
| <span class="n">init_pcoll</span> <span class="o">=</span> <span class="n">Foo</span><span class="p">()</span><span class="o">.</span><span class="n">run_pipeline</span><span class="p">()</span> |
| </pre></div> |
| </div> |
| <p>Interactive Beam caches init_pcoll for the first run.</p> |
| <p>Then you can use:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">show</span><span class="p">(</span><span class="n">init_pcoll</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>To visualize data from init_pcoll once the pipeline is executed.</p> |
| </div></blockquote> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.show"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">show</code><span class="sig-paren">(</span><em>*pcolls</em>, <em>include_window_info=False</em>, <em>visualize_data=False</em>, <em>n='inf'</em>, <em>duration='inf'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#show"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.show" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Shows given PCollections in an interactive exploratory way if used within |
| a notebook, or prints a heading sampled data if used within an ipython shell. |
| Noop if used in a non-interactive environment.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>include_window_info</strong> – (optional) if True, windowing information of the |
| data will be visualized too. Default is false.</li> |
| <li><strong>visualize_data</strong> – (optional) by default, the visualization contains data |
| tables rendering data from given pcolls separately as if they are |
| converted into dataframes. If visualize_data is True, there will be a |
| more dive-in widget and statistically overview widget of the data. |
| Otherwise, those 2 data visualization widgets will not be displayed.</li> |
| <li><strong>n</strong> – (optional) max number of elements to visualize. Default ‘inf’.</li> |
| <li><strong>duration</strong> – (optional) max duration of elements to read in integer seconds or |
| a string duration. Default ‘inf’.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>The given pcolls can be dictionary of PCollections (as values), or iterable |
| of PCollections or plain PCollection values.</p> |
| <p>The user can specify either the max number of elements with <cite>n</cite> to read |
| or the maximum duration of elements to read with <cite>duration</cite>. When a limiter is |
| not supplied, it is assumed to be infinite.</p> |
| <p>By default, the visualization contains data tables rendering data from given |
| pcolls separately as if they are converted into dataframes. If visualize_data |
| is True, there will be a more dive-in widget and statistically overview widget |
| of the data. Otherwise, those 2 data visualization widgets will not be |
| displayed.</p> |
| <p>Ad hoc builds a pipeline fragment including only transforms that are |
| necessary to produce data for given PCollections pcolls, runs the pipeline |
| fragment to compute data for those pcolls and then visualizes the data.</p> |
| <p>The function is always blocking. If used within a notebook, the data |
| visualized might be dynamically updated before the function returns as more |
| and more data could getting processed and emitted when the pipeline fragment |
| is being executed. If used within an ipython shell, there will be no dynamic |
| plotting but a static plotting in the end of pipeline fragment execution.</p> |
| <p>The PCollections given must belong to the same pipeline.</p> |
| <blockquote> |
| <div><p>For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">p</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">(</span><span class="n">InteractiveRunner</span><span class="p">())</span> |
| <span class="n">init</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'Init'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1000</span><span class="p">))</span> |
| <span class="n">square</span> <span class="o">=</span> <span class="n">init</span> <span class="o">|</span> <span class="s1">'Square'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> |
| <span class="n">cube</span> <span class="o">=</span> <span class="n">init</span> <span class="o">|</span> <span class="s1">'Cube'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">3</span><span class="p">)</span> |
| |
| <span class="c1"># Below builds a pipeline fragment from the defined pipeline `p` that</span> |
| <span class="c1"># contains only applied transforms of `Init` and `Square`. Then the</span> |
| <span class="c1"># interactive runner runs the pipeline fragment implicitly to compute data</span> |
| <span class="c1"># represented by PCollection `square` and visualizes it.</span> |
| <span class="n">show</span><span class="p">(</span><span class="n">square</span><span class="p">)</span> |
| |
| <span class="c1"># This is equivalent to `show(square)` because `square` depends on `init`</span> |
| <span class="c1"># and `init` is included in the pipeline fragment and computed anyway.</span> |
| <span class="n">show</span><span class="p">(</span><span class="n">init</span><span class="p">,</span> <span class="n">square</span><span class="p">)</span> |
| |
| <span class="c1"># Below is similar to running `p.run()`. It computes data for both</span> |
| <span class="c1"># PCollection `square` and PCollection `cube`, then visualizes them.</span> |
| <span class="n">show</span><span class="p">(</span><span class="n">square</span><span class="p">,</span> <span class="n">cube</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.collect"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">collect</code><span class="sig-paren">(</span><em>pcoll</em>, <em>n='inf'</em>, <em>duration='inf'</em>, <em>include_window_info=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#collect"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.collect" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Materializes the elements from a PCollection into a Dataframe.</p> |
| <p>This reads each element from file and reads only the amount that it needs |
| into memory. The user can specify either the max number of elements to read |
| or the maximum duration of elements to read. When a limiter is not supplied, |
| it is assumed to be infinite.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>n</strong> – (optional) max number of elements to visualize. Default ‘inf’.</li> |
| <li><strong>duration</strong> – (optional) max duration of elements to read in integer seconds or |
| a string duration. Default ‘inf’.</li> |
| <li><strong>include_window_info</strong> – (optional) if True, appends the windowing information |
| to each row. Default False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">p</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">(</span><span class="n">InteractiveRunner</span><span class="p">())</span> |
| <span class="n">init</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'Init'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span> |
| <span class="n">square</span> <span class="o">=</span> <span class="n">init</span> <span class="o">|</span> <span class="s1">'Square'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> |
| |
| <span class="c1"># Run the pipeline and bring the PCollection into memory as a Dataframe.</span> |
| <span class="n">in_memory_square</span> <span class="o">=</span> <span class="n">head</span><span class="p">(</span><span class="n">square</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.show_graph"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">show_graph</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#show_graph"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.show_graph" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Shows the current pipeline shape of a given Beam pipeline as a DAG.</p> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.evict_recorded_data"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">evict_recorded_data</code><span class="sig-paren">(</span><em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#evict_recorded_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.evict_recorded_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Forcefully evicts all recorded replayable data for the given pipeline. If |
| no pipeline is specified, evicts for all user defined pipelines.</p> |
| <p>Once invoked, Interactive Beam will record new data based on the guidance of |
| options the next time it evaluates/visualizes PCollections or runs pipelines.</p> |
| </dd></dl> |
| |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="apache_beam.runners.interactive.interactive_environment.html" class="btn btn-neutral float-right" title="apache_beam.runners.interactive.interactive_environment module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a> |
| |
| |
| <a href="apache_beam.runners.interactive.cache_manager.html" class="btn btn-neutral float-left" title="apache_beam.runners.interactive.cache_manager module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| © Copyright |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| |
| |
| |
| |
| |
| </body> |
| </html> |