| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>apache_beam.runners.interactive.interactive_beam module — Apache Beam 2.47.0 documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" src="_static/js/modernizr.min.js"></script> |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script> |
| <script type="text/javascript" src="_static/jquery.js"></script> |
| <script type="text/javascript" src="_static/underscore.js"></script> |
| <script type="text/javascript" src="_static/doctools.js"></script> |
| <script type="text/javascript" src="_static/language_data.js"></script> |
| <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| |
| <script type="text/javascript" src="_static/js/theme.js"></script> |
| |
| |
| |
| |
| <link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| <link rel="next" title="apache_beam.runners.interactive.interactive_environment module" href="apache_beam.runners.interactive.interactive_environment.html" /> |
| <link rel="prev" title="apache_beam.runners.interactive.cache_manager module" href="apache_beam.runners.interactive.cache_manager.html" /> |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> Apache Beam |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| <div class="version"> |
| 2.47.0 |
| </div> |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li> |
| <li class="toctree-l1 current"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="reference internal" href="apache_beam.runners.html#subpackages">Subpackages</a><ul class="current"> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.dask.html">apache_beam.runners.dask package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.dataflow.html">apache_beam.runners.dataflow package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.direct.html">apache_beam.runners.direct package</a></li> |
| <li class="toctree-l3 current"><a class="reference internal" href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a><ul class="current"> |
| <li class="toctree-l4"><a class="reference internal" href="apache_beam.runners.interactive.html#subpackages">Subpackages</a></li> |
| <li class="toctree-l4 current"><a class="reference internal" href="apache_beam.runners.interactive.html#submodules">Submodules</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.job.html">apache_beam.runners.job package</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="apache_beam.runners.html#submodules">Submodules</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">Apache Beam</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="index.html">Docs</a> »</li> |
| |
| <li><a href="apache_beam.runners.html">apache_beam.runners package</a> »</li> |
| |
| <li><a href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a> »</li> |
| |
| <li>apache_beam.runners.interactive.interactive_beam module</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="_sources/apache_beam.runners.interactive.interactive_beam.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="module-apache_beam.runners.interactive.interactive_beam"> |
| <span id="apache-beam-runners-interactive-interactive-beam-module"></span><h1>apache_beam.runners.interactive.interactive_beam module<a class="headerlink" href="#module-apache_beam.runners.interactive.interactive_beam" title="Permalink to this headline">¶</a></h1> |
| <p>Module of Interactive Beam features that can be used in notebook.</p> |
| <p>The purpose of the module is to reduce the learning curve of Interactive Beam |
| users, provide a single place for importing and add sugar syntax for all |
| Interactive Beam components. It gives users capability to interact with existing |
| environment/session/context for Interactive Beam and visualize PCollections as |
| bounded dataset. In the meantime, it hides the interactivity implementation |
| from users so that users can focus on developing Beam pipeline without worrying |
| about how hidden states in the interactive session are managed.</p> |
| <dl class="docutils"> |
| <dt>A convention to import this module:</dt> |
| <dd>from apache_beam.runners.interactive import interactive_beam as ib</dd> |
| </dl> |
| <p>Note: If you want backward-compatibility, only invoke interfaces provided by |
| this module in your notebook or application code.</p> |
| <dl class="class"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options"> |
| <em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Options</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Options"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="apache_beam.runners.interactive.options.interactive_options.html#apache_beam.runners.interactive.options.interactive_options.InteractiveOptions" title="apache_beam.runners.interactive.options.interactive_options.InteractiveOptions"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.runners.interactive.options.interactive_options.InteractiveOptions</span></code></a></p> |
| <p>Options that guide how Interactive Beam works.</p> |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.enable_recording_replay"> |
| <code class="descname">enable_recording_replay</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.enable_recording_replay" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Whether replayable source data recorded should be replayed for multiple |
| PCollection evaluations and pipeline runs as long as the data recorded is |
| still valid.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.recordable_sources"> |
| <code class="descname">recordable_sources</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recordable_sources" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Interactive Beam automatically records data from sources in this set.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.recording_duration"> |
| <code class="descname">recording_duration</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recording_duration" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The data recording of sources ends as soon as the background source |
| recording job has run for this long.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.recording_size_limit"> |
| <code class="descname">recording_size_limit</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.recording_size_limit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The data recording of sources ends as soon as the size (in bytes) of data |
| recorded from recordable sources reaches the limit.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.display_timestamp_format"> |
| <code class="descname">display_timestamp_format</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.display_timestamp_format" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The format in which timestamps are displayed.</p> |
| <p>Default is ‘%Y-%m-%d %H:%M:%S.%f%z’, e.g. 2020-02-01 15:05:06.000015-08:00.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.display_timezone"> |
| <code class="descname">display_timezone</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.display_timezone" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The timezone in which timestamps are displayed.</p> |
| <p>Defaults to local timezone.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Options.cache_root"> |
| <code class="descname">cache_root</code><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Options.cache_root" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The cache directory specified by the user.</p> |
| <p>Defaults to None.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings"> |
| <em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Recordings</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>An introspection interface for recordings for pipelines.</p> |
| <p>When a user materializes a PCollection onto disk (eg. ib.show) for a streaming |
| pipeline, a background source recording job is started. This job pulls data |
| from all defined unbounded sources for that PCollection’s pipeline. The |
| following methods allow for introspection into that background recording job.</p> |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.describe"> |
| <code class="descname">describe</code><span class="sig-paren">(</span><em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.describe"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.describe" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a description of all the recordings for the given pipeline.</p> |
| <p>If no pipeline is given then this returns a dictionary of descriptions for |
| all pipelines.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.clear"> |
| <code class="descname">clear</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.clear"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.clear" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears all recordings of the given pipeline. Returns True if cleared.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.stop"> |
| <code class="descname">stop</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.stop"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.stop" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Stops the background source recording of the given pipeline.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Recordings.record"> |
| <code class="descname">record</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Recordings.record"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Recordings.record" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Starts a background source recording job for the given pipeline. Returns |
| True if the recording job was started.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters"> |
| <em class="property">class </em><code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">Clusters</code><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Clusters"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>An interface to control clusters implicitly created and managed by |
| the current interactive environment. This class is not needed and |
| should not be used otherwise.</p> |
| <p>Do not use it for clusters a user explicitly manages: e.g., if you have |
| a Flink cluster running somewhere and provides the flink master when |
| running a pipeline with the FlinkRunner, the cluster will not be tracked |
| or managed by Beam. |
| To reuse the same cluster for your pipelines, use the same pipeline |
| options: e.g., a pipeline option with the same flink master if you are |
| using FlinkRunner.</p> |
| <p>This module is experimental. No backwards-compatibility guarantees.</p> |
| <p>Interactive Beam automatically creates/reuses existing worker clusters to |
| execute pipelines when it detects the need from configurations. |
| Currently, the only supported cluster implementation is Flink running on |
| Cloud Dataproc.</p> |
| <p>To configure a pipeline to run on Cloud Dataproc with Flink, set the |
| underlying runner of the InteractiveRunner to FlinkRunner and the pipeline |
| options to indicate where on Cloud the FlinkRunner should be deployed to.</p> |
| <blockquote> |
| <div><p>An example to enable automatic Dataproc cluster creation/reuse:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">options</span> <span class="o">=</span> <span class="n">PipelineOptions</span><span class="p">([</span> |
| <span class="s1">'--project=my-project'</span><span class="p">,</span> |
| <span class="s1">'--region=my-region'</span><span class="p">,</span> |
| <span class="s1">'--environment_type=DOCKER'</span><span class="p">])</span> |
| <span class="n">pipeline</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">(</span><span class="n">InteractiveRunner</span><span class="p">(</span> |
| <span class="n">underlying_runner</span><span class="o">=</span><span class="n">FlinkRunner</span><span class="p">()),</span> <span class="n">options</span><span class="o">=</span><span class="n">options</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| <p>Reuse a pipeline options in another pipeline would configure Interactive Beam |
| to reuse the same Dataproc cluster implicitly managed by the current |
| interactive environment. |
| If a flink_master is identified as a known cluster, the corresponding cluster |
| is also resued. |
| Furthermore, if a cluster is explicitly created by using a pipeline as an |
| identifier to a known cluster, the cluster is reused.</p> |
| <blockquote> |
| <div><p>An example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># If pipeline runs on a known cluster, below code reuses the cluster</span> |
| <span class="c1"># manager without creating a new one.</span> |
| <span class="n">dcm</span> <span class="o">=</span> <span class="n">ib</span><span class="o">.</span><span class="n">clusters</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">pipeline</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| <p>To provision the cluster, use WorkerOptions. Supported configurations are:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mf">1.</span> <span class="n">subnetwork</span> |
| <span class="mf">2.</span> <span class="n">num_workers</span> |
| <span class="mf">3.</span> <span class="n">machine_type</span> |
| </pre></div> |
| </div> |
| <p>To configure a pipeline to run on an existing FlinkRunner deployed elsewhere, |
| set the flink_master explicitly so no cluster will be created/reused.</p> |
| <blockquote> |
| <div><p>An example pipeline options to skip automatic Dataproc cluster usage:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">options</span> <span class="o">=</span> <span class="n">PipelineOptions</span><span class="p">([</span> |
| <span class="s1">'--flink_master=some.self.hosted.flink:port'</span><span class="p">,</span> |
| <span class="s1">'--environment_type=DOCKER'</span><span class="p">])</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| <p>To configure a pipeline to run on a local FlinkRunner, explicitly set the |
| default cluster metadata to None: ib.clusters.set_default_cluster(None).</p> |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.DATAPROC_FLINK_VERSION"> |
| <code class="descname">DATAPROC_FLINK_VERSION</code><em class="property"> = '1.12'</em><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.DATAPROC_FLINK_VERSION" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.DATAPROC_MINIMUM_WORKER_NUM"> |
| <code class="descname">DATAPROC_MINIMUM_WORKER_NUM</code><em class="property"> = 2</em><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.DATAPROC_MINIMUM_WORKER_NUM" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.create"> |
| <code class="descname">create</code><span class="sig-paren">(</span><em>cluster_identifier: Union[str, apache_beam.pipeline.Pipeline, apache_beam.runners.interactive.dataproc.types.ClusterMetadata]</em><span class="sig-paren">)</span> → apache_beam.runners.interactive.dataproc.dataproc_cluster_manager.DataprocClusterManager<a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Clusters.create"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.create" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a Dataproc cluster manager provisioned for the cluster |
| identified. If the cluster is known, returns an existing cluster manager.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.cleanup"> |
| <code class="descname">cleanup</code><span class="sig-paren">(</span><em>cluster_identifier: Union[str</em>, <em>apache_beam.pipeline.Pipeline</em>, <em>apache_beam.runners.interactive.dataproc.types.ClusterMetadata</em>, <em>None] = None</em>, <em>force: bool = False</em><span class="sig-paren">)</span> → None<a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Clusters.cleanup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.cleanup" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Cleans up the cluster associated with the given cluster_identifier.</p> |
| <p>When None cluster_identifier is provided: if force is True, cleans up for |
| all clusters; otherwise, do a dry run and NOOP. |
| If a beam.Pipeline is given as the ClusterIdentifier while multiple |
| pipelines share the same cluster, it only cleans up the association between |
| the pipeline and the cluster identified. |
| If the cluster_identifier is unknown, NOOP.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.describe"> |
| <code class="descname">describe</code><span class="sig-paren">(</span><em>cluster_identifier: Union[str</em>, <em>apache_beam.pipeline.Pipeline</em>, <em>apache_beam.runners.interactive.dataproc.types.ClusterMetadata</em>, <em>None] = None</em><span class="sig-paren">)</span> → Union[apache_beam.runners.interactive.dataproc.types.ClusterMetadata, List[apache_beam.runners.interactive.dataproc.types.ClusterMetadata]]<a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Clusters.describe"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.describe" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Describes the ClusterMetadata by a ClusterIdentifier.</p> |
| <p>If no cluster_identifier is given or if the cluster_identifier is unknown, |
| it returns descriptions for all known clusters.</p> |
| <p>Example usage: |
| # Describe the cluster executing work for a pipeline. |
| ib.clusters.describe(pipeline) |
| # Describe the cluster with the flink master url. |
| ib.clusters.describe(master_url) |
| # Describe all existing clusters. |
| ib.clusters.describe()</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.set_default_cluster"> |
| <code class="descname">set_default_cluster</code><span class="sig-paren">(</span><em>cluster_identifier: Union[str</em>, <em>apache_beam.pipeline.Pipeline</em>, <em>apache_beam.runners.interactive.dataproc.types.ClusterMetadata</em>, <em>None] = None</em><span class="sig-paren">)</span> → None<a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Clusters.set_default_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.set_default_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Temporarily sets the default metadata for creating or reusing a |
| DataprocClusterManager. It is always updated to the most recently created |
| cluster.</p> |
| <p>If no known ClusterMetadata can be identified by the ClusterIdentifer, NOOP. |
| If None is set, next time when Flink is in use, if no cluster is explicitly |
| configured by a pipeline, the job runs locally.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.Clusters.cluster_metadata"> |
| <code class="descname">cluster_metadata</code><span class="sig-paren">(</span><em>cluster_identifier: Union[str</em>, <em>apache_beam.pipeline.Pipeline</em>, <em>apache_beam.runners.interactive.dataproc.types.ClusterMetadata</em>, <em>None] = None</em><span class="sig-paren">)</span> → Optional[apache_beam.runners.interactive.dataproc.types.ClusterMetadata]<a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#Clusters.cluster_metadata"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.Clusters.cluster_metadata" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fetches the ClusterMetadata by a ClusterIdentifier that could be a |
| URL in string, a Beam pipeline, or an equivalent to a known ClusterMetadata;</p> |
| <p>If the given cluster_identifier is an URL or a pipeline that is unknown to |
| the current environment, the default cluster metadata (could be None) is |
| returned. |
| If the given cluster_identifier is a ClusterMetadata but unknown to the |
| current environment, passes it through (NOOP).</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.watch"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">watch</code><span class="sig-paren">(</span><em>watchable</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#watch"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.watch" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Monitors a watchable.</p> |
| <p>This allows Interactive Beam to implicitly pass on the information about the |
| location of your pipeline definition.</p> |
| <p>Current implementation mainly watches for PCollection variables defined in |
| user code. A watchable can be a dictionary of variable metadata such as |
| locals(), a str name of a module, a module object or an instance of a class. |
| The variable can come from any scope even local variables in a method of a |
| class defined in a module.</p> |
| <blockquote> |
| <div><p>Below are all valid:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">watch</span><span class="p">(</span><span class="n">__main__</span><span class="p">)</span> <span class="c1"># if import __main__ is already invoked</span> |
| <span class="n">watch</span><span class="p">(</span><span class="s1">'__main__'</span><span class="p">)</span> <span class="c1"># does not require invoking import __main__ beforehand</span> |
| <span class="n">watch</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="c1"># inside a class</span> |
| <span class="n">watch</span><span class="p">(</span><span class="n">SomeInstance</span><span class="p">())</span> <span class="c1"># an instance of a class</span> |
| <span class="n">watch</span><span class="p">(</span><span class="nb">locals</span><span class="p">())</span> <span class="c1"># inside a function, watching local variables within</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| <p>If you write a Beam pipeline in the __main__ module directly, since the |
| __main__ module is always watched, you don’t have to instruct Interactive |
| Beam. If your Beam pipeline is defined in some module other than __main__, |
| such as inside a class function or a unit test, you can watch() the scope.</p> |
| <blockquote> |
| <div><p>For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Foo</span><span class="p">(</span><span class="nb">object</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">run_pipeline</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">with</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span> |
| <span class="n">init_pcoll</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'Init Create'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span> |
| <span class="n">watch</span><span class="p">(</span><span class="nb">locals</span><span class="p">())</span> |
| <span class="k">return</span> <span class="n">init_pcoll</span> |
| <span class="n">init_pcoll</span> <span class="o">=</span> <span class="n">Foo</span><span class="p">()</span><span class="o">.</span><span class="n">run_pipeline</span><span class="p">()</span> |
| </pre></div> |
| </div> |
| <p>Interactive Beam caches init_pcoll for the first run.</p> |
| <p>Then you can use:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">show</span><span class="p">(</span><span class="n">init_pcoll</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>To visualize data from init_pcoll once the pipeline is executed.</p> |
| </div></blockquote> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.show"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">show</code><span class="sig-paren">(</span><em>*pcolls</em>, <em>include_window_info=False</em>, <em>visualize_data=False</em>, <em>n='inf'</em>, <em>duration='inf'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#show"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.show" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Shows given PCollections in an interactive exploratory way if used within |
| a notebook, or prints a heading sampled data if used within an ipython shell. |
| Noop if used in a non-interactive environment.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>include_window_info</strong> – (optional) if True, windowing information of the |
| data will be visualized too. Default is false.</li> |
| <li><strong>visualize_data</strong> – (optional) by default, the visualization contains data |
| tables rendering data from given pcolls separately as if they are |
| converted into dataframes. If visualize_data is True, there will be a |
| more dive-in widget and statistically overview widget of the data. |
| Otherwise, those 2 data visualization widgets will not be displayed.</li> |
| <li><strong>n</strong> – (optional) max number of elements to visualize. Default ‘inf’.</li> |
| <li><strong>duration</strong> – (optional) max duration of elements to read in integer seconds or |
| a string duration. Default ‘inf’.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>The given pcolls can be dictionary of PCollections (as values), or iterable |
| of PCollections or plain PCollection values.</p> |
| <p>The user can specify either the max number of elements with <cite>n</cite> to read |
| or the maximum duration of elements to read with <cite>duration</cite>. When a limiter is |
| not supplied, it is assumed to be infinite.</p> |
| <p>By default, the visualization contains data tables rendering data from given |
| pcolls separately as if they are converted into dataframes. If visualize_data |
| is True, there will be a more dive-in widget and statistically overview widget |
| of the data. Otherwise, those 2 data visualization widgets will not be |
| displayed.</p> |
| <p>Ad hoc builds a pipeline fragment including only transforms that are |
| necessary to produce data for given PCollections pcolls, runs the pipeline |
| fragment to compute data for those pcolls and then visualizes the data.</p> |
| <p>The function is always blocking. If used within a notebook, the data |
| visualized might be dynamically updated before the function returns as more |
| and more data could getting processed and emitted when the pipeline fragment |
| is being executed. If used within an ipython shell, there will be no dynamic |
| plotting but a static plotting in the end of pipeline fragment execution.</p> |
| <p>The PCollections given must belong to the same pipeline.</p> |
| <blockquote> |
| <div><p>For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">p</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">(</span><span class="n">InteractiveRunner</span><span class="p">())</span> |
| <span class="n">init</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'Init'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1000</span><span class="p">))</span> |
| <span class="n">square</span> <span class="o">=</span> <span class="n">init</span> <span class="o">|</span> <span class="s1">'Square'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> |
| <span class="n">cube</span> <span class="o">=</span> <span class="n">init</span> <span class="o">|</span> <span class="s1">'Cube'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">3</span><span class="p">)</span> |
| |
| <span class="c1"># Below builds a pipeline fragment from the defined pipeline `p` that</span> |
| <span class="c1"># contains only applied transforms of `Init` and `Square`. Then the</span> |
| <span class="c1"># interactive runner runs the pipeline fragment implicitly to compute data</span> |
| <span class="c1"># represented by PCollection `square` and visualizes it.</span> |
| <span class="n">show</span><span class="p">(</span><span class="n">square</span><span class="p">)</span> |
| |
| <span class="c1"># This is equivalent to `show(square)` because `square` depends on `init`</span> |
| <span class="c1"># and `init` is included in the pipeline fragment and computed anyway.</span> |
| <span class="n">show</span><span class="p">(</span><span class="n">init</span><span class="p">,</span> <span class="n">square</span><span class="p">)</span> |
| |
| <span class="c1"># Below is similar to running `p.run()`. It computes data for both</span> |
| <span class="c1"># PCollection `square` and PCollection `cube`, then visualizes them.</span> |
| <span class="n">show</span><span class="p">(</span><span class="n">square</span><span class="p">,</span> <span class="n">cube</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </div></blockquote> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.collect"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">collect</code><span class="sig-paren">(</span><em>pcoll</em>, <em>n='inf'</em>, <em>duration='inf'</em>, <em>include_window_info=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#collect"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.collect" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Materializes the elements from a PCollection into a Dataframe.</p> |
| <p>This reads each element from file and reads only the amount that it needs |
| into memory. The user can specify either the max number of elements to read |
| or the maximum duration of elements to read. When a limiter is not supplied, |
| it is assumed to be infinite.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>n</strong> – (optional) max number of elements to visualize. Default ‘inf’.</li> |
| <li><strong>duration</strong> – (optional) max duration of elements to read in integer seconds or |
| a string duration. Default ‘inf’.</li> |
| <li><strong>include_window_info</strong> – (optional) if True, appends the windowing information |
| to each row. Default False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">p</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">(</span><span class="n">InteractiveRunner</span><span class="p">())</span> |
| <span class="n">init</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'Init'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span> |
| <span class="n">square</span> <span class="o">=</span> <span class="n">init</span> <span class="o">|</span> <span class="s1">'Square'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> |
| |
| <span class="c1"># Run the pipeline and bring the PCollection into memory as a Dataframe.</span> |
| <span class="n">in_memory_square</span> <span class="o">=</span> <span class="n">head</span><span class="p">(</span><span class="n">square</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.show_graph"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">show_graph</code><span class="sig-paren">(</span><em>pipeline</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#show_graph"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.show_graph" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Shows the current pipeline shape of a given Beam pipeline as a DAG.</p> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="apache_beam.runners.interactive.interactive_beam.evict_recorded_data"> |
| <code class="descclassname">apache_beam.runners.interactive.interactive_beam.</code><code class="descname">evict_recorded_data</code><span class="sig-paren">(</span><em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/interactive_beam.html#evict_recorded_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.runners.interactive.interactive_beam.evict_recorded_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Forcefully evicts all recorded replayable data for the given pipeline. If |
| no pipeline is specified, evicts for all user defined pipelines.</p> |
| <p>Once invoked, Interactive Beam will record new data based on the guidance of |
| options the next time it evaluates/visualizes PCollections or runs pipelines.</p> |
| </dd></dl> |
| |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="apache_beam.runners.interactive.interactive_environment.html" class="btn btn-neutral float-right" title="apache_beam.runners.interactive.interactive_environment module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a> |
| |
| |
| <a href="apache_beam.runners.interactive.cache_manager.html" class="btn btn-neutral float-left" title="apache_beam.runners.interactive.cache_manager module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| © Copyright |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| |
| |
| |
| |
| |
| </body> |
| </html> |