blob: b631251e35dd1389f2e216525c57d16a5462d27d [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>apache_beam.runners.interactive.background_caching_job module &mdash; Apache Beam 2.67.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=b86133f3" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=959b4fbe"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.runners.interactive.cache_manager module" href="apache_beam.runners.interactive.cache_manager.html" />
<link rel="prev" title="apache_beam.runners.interactive.augmented_pipeline module" href="apache_beam.runners.interactive.augmented_pipeline.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
Apache Beam
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.runners.html#subpackages">Subpackages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.dask.html">apache_beam.runners.dask package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.dataflow.html">apache_beam.runners.dataflow package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.direct.html">apache_beam.runners.direct package</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="apache_beam.runners.interactive.html#subpackages">Subpackages</a></li>
<li class="toctree-l4 current"><a class="reference internal" href="apache_beam.runners.interactive.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.runners.job.html">apache_beam.runners.job package</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="apache_beam.runners.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="breadcrumb-item"><a href="apache_beam.runners.interactive.html">apache_beam.runners.interactive package</a></li>
<li class="breadcrumb-item active">apache_beam.runners.interactive.background_caching_job module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.runners.interactive.background_caching_job.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-apache_beam.runners.interactive.background_caching_job">
<span id="apache-beam-runners-interactive-background-caching-job-module"></span><h1>apache_beam.runners.interactive.background_caching_job module<a class="headerlink" href="#module-apache_beam.runners.interactive.background_caching_job" title="Link to this heading"></a></h1>
<p>Module to build and run background source recording jobs.</p>
<p>For internal use only; no backwards-compatibility guarantees.</p>
<p>A background source recording job is a job that records events for all
recordable sources of a given pipeline. With Interactive Beam, one such job is
started when a pipeline run happens (which produces a main job in contrast to
the background source recording job) and meets the following conditions:</p>
<blockquote>
<div><ol class="arabic simple">
<li><p>The pipeline contains recordable sources, configured through
interactive_beam.options.recordable_sources.</p></li>
<li><p>No such background job is running.</p></li>
<li><p>No such background job has completed successfully and the cached events are
still valid (invalidated when recordable sources change in the pipeline).</p></li>
</ol>
</div></blockquote>
<p>Once started, the background source recording job runs asynchronously until it
hits some recording limit configured in interactive_beam.options. Meanwhile,
the main job and future main jobs from the pipeline will run using the
deterministic replayable recorded events until they are invalidated.</p>
<dl class="py class">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">BackgroundCachingJob</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pipeline_result</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">limiters</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#BackgroundCachingJob"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob" title="Link to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>A simple abstraction that controls necessary components of a timed and
space limited background source recording job.</p>
<p>A background source recording job successfully completes source data
recording in 2 conditions:</p>
<blockquote>
<div><ol class="arabic simple">
<li><p>The job is finite and runs into DONE state;</p></li>
<li><p>The job is infinite but hits an interactive_beam.options configured limit
and gets cancelled into CANCELLED/CANCELLING state.</p></li>
</ol>
</div></blockquote>
<p>In both situations, the background source recording job should be treated as
done successfully.</p>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.is_done">
<span class="sig-name descname"><span class="pre">is_done</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#BackgroundCachingJob.is_done"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.is_done" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.is_running">
<span class="sig-name descname"><span class="pre">is_running</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#BackgroundCachingJob.is_running"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.is_running" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.cancel">
<span class="sig-name descname"><span class="pre">cancel</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#BackgroundCachingJob.cancel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.cancel" title="Link to this definition"></a></dt>
<dd><p>Cancels this background source recording job.</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.state">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">state</span></span><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.BackgroundCachingJob.state" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.attempt_to_run_background_caching_job">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">attempt_to_run_background_caching_job</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">runner</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">options</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">limiters</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#attempt_to_run_background_caching_job"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.attempt_to_run_background_caching_job" title="Link to this definition"></a></dt>
<dd><p>Attempts to run a background source recording job for a user-defined
pipeline.</p>
<p>Returns True if a job was started, False otherwise.</p>
<p>The pipeline result is automatically tracked by Interactive Beam in case
future cancellation/cleanup is needed.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.is_background_caching_job_needed">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">is_background_caching_job_needed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#is_background_caching_job_needed"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.is_background_caching_job_needed" title="Link to this definition"></a></dt>
<dd><p>Determines if a background source recording job needs to be started.</p>
<p>It does several state checks and recording state changes throughout the
process. It is not idempotent to simplify the usage.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.is_cache_complete">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">is_cache_complete</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pipeline_id</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a></span></span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#is_cache_complete"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.is_cache_complete" title="Link to this definition"></a></dt>
<dd><p>Returns True if the backgrond cache for the given pipeline is done.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.has_source_to_cache">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">has_source_to_cache</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#has_source_to_cache"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.has_source_to_cache" title="Link to this definition"></a></dt>
<dd><p>Determines if a user-defined pipeline contains any source that need to be
cached. If so, also immediately wrap current cache manager held by current
interactive environment into a streaming cache if this has not been done.
The wrapping doesn’t invalidate existing cache in any way.</p>
<p>This can help determining if a background source recording job is needed to
write cache for sources and if a test stream service is needed to serve the
cache.</p>
<p>Throughout the check, if source-to-cache has changed from the last check, it
also cleans up the invalidated cache early on.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.attempt_to_cancel_background_caching_job">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">attempt_to_cancel_background_caching_job</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#attempt_to_cancel_background_caching_job"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.attempt_to_cancel_background_caching_job" title="Link to this definition"></a></dt>
<dd><p>Attempts to cancel background source recording job for a user-defined
pipeline.</p>
<p>If no background source recording job needs to be cancelled, NOOP. Otherwise,
cancel such job.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.attempt_to_stop_test_stream_service">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">attempt_to_stop_test_stream_service</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#attempt_to_stop_test_stream_service"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.attempt_to_stop_test_stream_service" title="Link to this definition"></a></dt>
<dd><p>Attempts to stop the gRPC server/service serving the test stream.</p>
<p>If there is no such server started, NOOP. Otherwise, stop it.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.is_a_test_stream_service_running">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">is_a_test_stream_service_running</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#is_a_test_stream_service_running"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.is_a_test_stream_service_running" title="Link to this definition"></a></dt>
<dd><p>Checks to see if there is a gPRC server/service running that serves the
test stream to any job started from the given user_pipeline.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.is_source_to_cache_changed">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">is_source_to_cache_changed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">update_cached_source_signature</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#is_source_to_cache_changed"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.is_source_to_cache_changed" title="Link to this definition"></a></dt>
<dd><p>Determines if there is any change in the sources that need to be cached
used by the user-defined pipeline.</p>
<p>Due to the expensiveness of computations and for the simplicity of usage, this
function is not idempotent because Interactive Beam automatically discards
previously tracked signature of transforms and tracks the current signature of
transforms for the user-defined pipeline if there is any change.</p>
<p>When it’s True, there is addition/deletion/mutation of source transforms that
requires a new background source recording job.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.runners.interactive.background_caching_job.extract_source_to_cache_signature">
<span class="sig-prename descclassname"><span class="pre">apache_beam.runners.interactive.background_caching_job.</span></span><span class="sig-name descname"><span class="pre">extract_source_to_cache_signature</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user_pipeline</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/runners/interactive/background_caching_job.html#extract_source_to_cache_signature"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.runners.interactive.background_caching_job.extract_source_to_cache_signature" title="Link to this definition"></a></dt>
<dd><p>Extracts a set of signature for sources that need to be cached in the
user-defined pipeline.</p>
<p>A signature is a str representation of urn and payload of a source.</p>
</dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="apache_beam.runners.interactive.augmented_pipeline.html" class="btn btn-neutral float-left" title="apache_beam.runners.interactive.augmented_pipeline module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="apache_beam.runners.interactive.cache_manager.html" class="btn btn-neutral float-right" title="apache_beam.runners.interactive.cache_manager module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright %Y, Apache Beam.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>