blob: 2dc31e833f3014bf0fb3e3ecf24608e32a9d7fa5 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>apache_beam.ml.transforms.base module &mdash; Apache Beam 2.55.1 documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.ml.transforms.handlers module" href="apache_beam.ml.transforms.handlers.html" />
<link rel="prev" title="apache_beam.ml.transforms.embeddings.vertex_ai module" href="apache_beam.ml.transforms.embeddings.vertex_ai.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Apache Beam
</a>
<div class="version">
2.55.1
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.ml.html#subpackages">Subpackages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.gcp.html">apache_beam.ml.gcp package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.inference.html">apache_beam.ml.inference package</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="apache_beam.ml.transforms.html">apache_beam.ml.transforms package</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="apache_beam.ml.transforms.html#subpackages">Subpackages</a></li>
<li class="toctree-l4 current"><a class="reference internal" href="apache_beam.ml.transforms.html#submodules">Submodules</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li><a href="apache_beam.ml.html">apache_beam.ml package</a> &raquo;</li>
<li><a href="apache_beam.ml.transforms.html">apache_beam.ml.transforms package</a> &raquo;</li>
<li>apache_beam.ml.transforms.base module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.ml.transforms.base.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-apache_beam.ml.transforms.base">
<span id="apache-beam-ml-transforms-base-module"></span><h1>apache_beam.ml.transforms.base module<a class="headerlink" href="#module-apache_beam.ml.transforms.base" title="Permalink to this headline"></a></h1>
<dl class="class">
<dt id="apache_beam.ml.transforms.base.MLTransformProvider">
<em class="property">class </em><code class="descclassname">apache_beam.ml.transforms.base.</code><code class="descname">MLTransformProvider</code><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransformProvider"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransformProvider" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>Data processing transforms that are intended to be used with MLTransform
should subclass MLTransformProvider and implement
get_ptransform_for_processing().</p>
<p>get_ptransform_for_processing() method should return a PTransform that can be
used to process the data.</p>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.MLTransformProvider.get_ptransform_for_processing">
<code class="descname">get_ptransform_for_processing</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span> &#x2192; apache_beam.transforms.ptransform.PTransform<a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransformProvider.get_ptransform_for_processing"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransformProvider.get_ptransform_for_processing" title="Permalink to this definition"></a></dt>
<dd><p>Returns a PTransform that can be used to process the data.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.MLTransformProvider.get_counter">
<code class="descname">get_counter</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransformProvider.get_counter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransformProvider.get_counter" title="Permalink to this definition"></a></dt>
<dd><p>Returns the counter name for the data processing transform.</p>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.ml.transforms.base.BaseOperation">
<em class="property">class </em><code class="descclassname">apache_beam.ml.transforms.base.</code><code class="descname">BaseOperation</code><span class="sig-paren">(</span><em>columns: List[str]</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#BaseOperation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.BaseOperation" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Generic" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">typing.Generic</span></code></a>, <a class="reference internal" href="#apache_beam.ml.transforms.base.MLTransformProvider" title="apache_beam.ml.transforms.base.MLTransformProvider"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.ml.transforms.base.MLTransformProvider</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/abc.html#abc.ABC" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">abc.ABC</span></code></a></p>
<p>Base Opertation class data processing transformations.
:param columns: List of column names to apply the transformation.</p>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.BaseOperation.apply_transform">
<code class="descname">apply_transform</code><span class="sig-paren">(</span><em>data: OperationInputT</em>, <em>output_column_name: str</em><span class="sig-paren">)</span> &#x2192; Dict[str, OperationOutputT]<a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#BaseOperation.apply_transform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.BaseOperation.apply_transform" title="Permalink to this definition"></a></dt>
<dd><p>Define any processing logic in the apply_transform() method.
processing logics are applied on inputs and returns a transformed
output.
:param inputs: input data.</p>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.ml.transforms.base.ProcessHandler">
<em class="property">class </em><code class="descclassname">apache_beam.ml.transforms.base.</code><code class="descname">ProcessHandler</code><span class="sig-paren">(</span><em>label=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#ProcessHandler"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.ProcessHandler" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/abc.html#abc.ABC" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">abc.ABC</span></code></a></p>
<p>Only for internal use. No backwards compatibility guarantees.</p>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.ProcessHandler.append_transform">
<code class="descname">append_transform</code><span class="sig-paren">(</span><em>transform: apache_beam.ml.transforms.base.BaseOperation</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#ProcessHandler.append_transform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.ProcessHandler.append_transform" title="Permalink to this definition"></a></dt>
<dd><p>Append transforms to the ProcessHandler.</p>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.ml.transforms.base.EmbeddingsManager">
<em class="property">class </em><code class="descclassname">apache_beam.ml.transforms.base.</code><code class="descname">EmbeddingsManager</code><span class="sig-paren">(</span><em>columns: List[str], *, load_model_args: Optional[Dict[str, Any]] = None, min_batch_size: Optional[int] = None, max_batch_size: Optional[int] = None, large_model: bool = False, **kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#EmbeddingsManager"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.EmbeddingsManager" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#apache_beam.ml.transforms.base.MLTransformProvider" title="apache_beam.ml.transforms.base.MLTransformProvider"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.ml.transforms.base.MLTransformProvider</span></code></a></p>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.EmbeddingsManager.get_model_handler">
<code class="descname">get_model_handler</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; apache_beam.ml.inference.base.ModelHandler<a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#EmbeddingsManager.get_model_handler"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.EmbeddingsManager.get_model_handler" title="Permalink to this definition"></a></dt>
<dd><p>Return framework specific model handler.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.EmbeddingsManager.get_columns_to_apply">
<code class="descname">get_columns_to_apply</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#EmbeddingsManager.get_columns_to_apply"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.EmbeddingsManager.get_columns_to_apply" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.ml.transforms.base.MLTransform">
<em class="property">class </em><code class="descclassname">apache_beam.ml.transforms.base.</code><code class="descname">MLTransform</code><span class="sig-paren">(</span><em>*</em>, <em>write_artifact_location: Optional[str] = None</em>, <em>read_artifact_location: Optional[str] = None</em>, <em>transforms: Optional[List[apache_beam.ml.transforms.base.MLTransformProvider]] = None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransform" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a>, <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Generic" title="(in Python v3.12)"><code class="xref py py-class docutils literal notranslate"><span class="pre">typing.Generic</span></code></a></p>
<p>MLTransform is a Beam PTransform that can be used to apply
transformations to the data. MLTransform is used to wrap the
data processing transforms provided by Apache Beam. MLTransform
works in two modes: write and read. In the write mode,
MLTransform will apply the transforms to the data and store the
artifacts in the write_artifact_location. In the read mode,
MLTransform will read the artifacts from the
read_artifact_location and apply the transforms to the data. The
artifact location should be a valid storage path where the artifacts
can be written to or read from.</p>
<p>Note that when consuming artifacts, it is not necessary to pass the
transforms since they are inherently stored within the artifacts
themselves.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>write_artifact_location</strong> – A storage location for artifacts resulting from
MLTransform. These artifacts include transformations applied to
the dataset and generated values like min, max from ScaleTo01,
and mean, var from ScaleToZScore. Artifacts are produced and written
to this location when using <cite>write_artifact_mode</cite>.
Later MLTransforms can reuse produced artifacts by setting
<cite>read_artifact_mode</cite> instead of <cite>write_artifact_mode</cite>. The value
assigned to <cite>write_artifact_location</cite> should be a valid storage
directory that the artifacts from this transform can be written to.
If no directory exists at this location, one will be created. This will
overwrite any artifacts already in this location, so distinct locations
should be used for each instance of MLTransform. Only one of
write_artifact_location and read_artifact_location should be specified.</li>
<li><strong>read_artifact_location</strong> – A storage location to read artifacts resulting
froma previous MLTransform. These artifacts include transformations
applied to the dataset and generated values like min, max from
ScaleTo01, and mean, var from ScaleToZScore. Note that when consuming
artifacts, it is not necessary to pass the transforms since they are
inherently stored within the artifacts themselves. The value assigned
to <cite>read_artifact_location</cite> should be a valid storage path where the
artifacts can be read from. Only one of write_artifact_location and
read_artifact_location should be specified.</li>
<li><strong>transforms</strong> – A list of transforms to apply to the data. All the transforms
are applied in the order they are specified. The input of the
i-th transform is the output of the (i-1)-th transform. Multi-input
transforms are not supported yet.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.MLTransform.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll: apache_beam.pvalue.PCollection[~ExampleT][ExampleT]</em><span class="sig-paren">)</span> &#x2192; Union[apache_beam.pvalue.PCollection[~MLTransformOutputT][MLTransformOutputT], Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT][MLTransformOutputT], apache_beam.pvalue.PCollection[apache_beam.pvalue.Row][apache_beam.pvalue.Row]]]<a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransform.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransform.expand" title="Permalink to this definition"></a></dt>
<dd><p>This is the entrypoint for the MLTransform. This method will
invoke the process_data() method of the ProcessHandler instance
to process the incoming data.</p>
<p>process_data takes in a PCollection and applies the PTransforms
necessary to process the data and returns a PCollection of
transformed data.
:param pcoll: A PCollection of ExampleT type.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">A PCollection of MLTransformOutputT type</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.MLTransform.with_transform">
<code class="descname">with_transform</code><span class="sig-paren">(</span><em>transform: apache_beam.ml.transforms.base.MLTransformProvider</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransform.with_transform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransform.with_transform" title="Permalink to this definition"></a></dt>
<dd><p>Add a transform to the MLTransform pipeline.
:param transform: A BaseOperation instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">A MLTransform instance.</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="apache_beam.ml.transforms.base.MLTransform.with_exception_handling">
<code class="descname">with_exception_handling</code><span class="sig-paren">(</span><em>*</em>, <em>exc_class=&lt;class 'Exception'&gt;</em>, <em>use_subprocess=False</em>, <em>threshold=1</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/transforms/base.html#MLTransform.with_exception_handling"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.ml.transforms.base.MLTransform.with_exception_handling" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="apache_beam.ml.transforms.handlers.html" class="btn btn-neutral float-right" title="apache_beam.ml.transforms.handlers module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="apache_beam.ml.transforms.embeddings.vertex_ai.html" class="btn btn-neutral float-left" title="apache_beam.ml.transforms.embeddings.vertex_ai module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>