| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>apache_beam.transforms.ptransform — Apache Beam 2.38.0 documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" src="../../../_static/js/modernizr.min.js"></script> |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script> |
| <script type="text/javascript" src="../../../_static/jquery.js"></script> |
| <script type="text/javascript" src="../../../_static/underscore.js"></script> |
| <script type="text/javascript" src="../../../_static/doctools.js"></script> |
| <script type="text/javascript" src="../../../_static/language_data.js"></script> |
| <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| |
| <script type="text/javascript" src="../../../_static/js/theme.js"></script> |
| |
| |
| |
| |
| <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="../../../genindex.html" /> |
| <link rel="search" title="Search" href="../../../search.html" /> |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="../../../index.html" class="icon icon-home"> Apache Beam |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| <div class="version"> |
| 2.38.0 |
| </div> |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.coders.html">apache_beam.coders package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.dataframe.html">apache_beam.dataframe package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.io.html">apache_beam.io package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.metrics.html">apache_beam.metrics package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.ml.html">apache_beam.ml package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.options.html">apache_beam.options package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.portability.html">apache_beam.portability package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.runners.html">apache_beam.runners package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.transforms.html">apache_beam.transforms package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.typehints.html">apache_beam.typehints package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.utils.html">apache_beam.utils package</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.error.html">apache_beam.error module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pipeline.html">apache_beam.pipeline module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../apache_beam.pvalue.html">apache_beam.pvalue module</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../../../index.html">Apache Beam</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="../../../index.html">Docs</a> »</li> |
| |
| <li><a href="../../index.html">Module code</a> »</li> |
| |
| <li>apache_beam.transforms.ptransform</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <h1>Source code for apache_beam.transforms.ptransform</h1><div class="highlight"><pre> |
| <span></span><span class="c1">#</span> |
| <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> |
| <span class="c1"># contributor license agreements. See the NOTICE file distributed with</span> |
| <span class="c1"># this work for additional information regarding copyright ownership.</span> |
| <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> |
| <span class="c1"># (the "License"); you may not use this file except in compliance with</span> |
| <span class="c1"># the License. You may obtain a copy of the License at</span> |
| <span class="c1">#</span> |
| <span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="c1">#</span> |
| <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> |
| <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="c1"># See the License for the specific language governing permissions and</span> |
| <span class="c1"># limitations under the License.</span> |
| <span class="c1">#</span> |
| |
| <span class="sd">"""PTransform and descendants.</span> |
| |
| <span class="sd">A PTransform is an object describing (not executing) a computation. The actual</span> |
| <span class="sd">execution semantics for a transform is captured by a runner object. A transform</span> |
| <span class="sd">object always belongs to a pipeline object.</span> |
| |
| <span class="sd">A PTransform derived class needs to define the expand() method that describes</span> |
| <span class="sd">how one or more PValues are created by the transform.</span> |
| |
| <span class="sd">The module defines a few standard transforms: FlatMap (parallel do),</span> |
| <span class="sd">GroupByKey (group by key), etc. Note that the expand() methods for these</span> |
| <span class="sd">classes contain code that will add nodes to the processing graph associated</span> |
| <span class="sd">with a pipeline.</span> |
| |
| <span class="sd">As support for the FlatMap transform, the module also defines a DoFn</span> |
| <span class="sd">class and wrapper class that allows lambda functions to be used as</span> |
| <span class="sd">FlatMap processing functions.</span> |
| <span class="sd">"""</span> |
| |
| <span class="c1"># pytype: skip-file</span> |
| |
| <span class="kn">import</span> <span class="nn">copy</span> |
| <span class="kn">import</span> <span class="nn">itertools</span> |
| <span class="kn">import</span> <span class="nn">logging</span> |
| <span class="kn">import</span> <span class="nn">operator</span> |
| <span class="kn">import</span> <span class="nn">os</span> |
| <span class="kn">import</span> <span class="nn">sys</span> |
| <span class="kn">import</span> <span class="nn">threading</span> |
| <span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">reduce</span> |
| <span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">wraps</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Callable</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Dict</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Mapping</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Sequence</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Tuple</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Type</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TypeVar</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">overload</span> |
| |
| <span class="kn">from</span> <span class="nn">google.protobuf</span> <span class="kn">import</span> <span class="n">message</span> |
| |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">error</span> |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">pvalue</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.internal</span> <span class="kn">import</span> <span class="n">pickler</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.internal</span> <span class="kn">import</span> <span class="n">util</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.portability</span> <span class="kn">import</span> <span class="n">python_urns</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.pvalue</span> <span class="kn">import</span> <span class="n">DoOutputsTuple</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms</span> <span class="kn">import</span> <span class="n">resources</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.display</span> <span class="kn">import</span> <span class="n">DisplayDataItem</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.display</span> <span class="kn">import</span> <span class="n">HasDisplayData</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.sideinputs</span> <span class="kn">import</span> <span class="n">SIDE_INPUT_PREFIX</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints</span> <span class="kn">import</span> <span class="n">native_type_compatibility</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints</span> <span class="kn">import</span> <span class="n">typehints</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.decorators</span> <span class="kn">import</span> <span class="n">IOTypeHints</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.decorators</span> <span class="kn">import</span> <span class="n">TypeCheckError</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.decorators</span> <span class="kn">import</span> <span class="n">WithTypeHints</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.decorators</span> <span class="kn">import</span> <span class="n">get_signature</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.decorators</span> <span class="kn">import</span> <span class="n">get_type_hints</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.decorators</span> <span class="kn">import</span> <span class="n">getcallargs_forhints</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.trivial_inference</span> <span class="kn">import</span> <span class="n">instance_to_type</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.typehints.typehints</span> <span class="kn">import</span> <span class="n">validate_composite_type_param</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.utils</span> <span class="kn">import</span> <span class="n">proto_utils</span> |
| |
| <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">coders</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.pipeline</span> <span class="kn">import</span> <span class="n">Pipeline</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.runners.pipeline_context</span> <span class="kn">import</span> <span class="n">PipelineContext</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.core</span> <span class="kn">import</span> <span class="n">Windowing</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.portability.api</span> <span class="kn">import</span> <span class="n">beam_runner_api_pb2</span> |
| |
| <span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="s1">'PTransform'</span><span class="p">,</span> |
| <span class="s1">'ptransform_fn'</span><span class="p">,</span> |
| <span class="s1">'label_from_callable'</span><span class="p">,</span> |
| <span class="p">]</span> |
| |
| <span class="n">_LOGGER</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span> |
| |
| <span class="n">T</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s1">'T'</span><span class="p">)</span> |
| <span class="n">PTransformT</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s1">'PTransformT'</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s1">'PTransform'</span><span class="p">)</span> |
| <span class="n">ConstructorFn</span> <span class="o">=</span> <span class="n">Callable</span><span class="p">[</span> |
| <span class="p">[</span><span class="s1">'beam_runner_api_pb2.PTransform'</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="s1">'PipelineContext'</span><span class="p">],</span> <span class="n">Any</span><span class="p">]</span> |
| <span class="n">ptransform_fn_typehints_enabled</span> <span class="o">=</span> <span class="kc">False</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_PValueishTransform</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span> |
| <span class="sd">"""Visitor for PValueish objects.</span> |
| |
| <span class="sd"> A PValueish is a PValue, or list, tuple, dict of PValuesish objects.</span> |
| |
| <span class="sd"> This visits a PValueish, contstructing a (possibly mutated) copy.</span> |
| <span class="sd"> """</span> |
| <span class="k">def</span> <span class="nf">visit_nested</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">node</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="p">(</span><span class="nb">tuple</span><span class="p">,</span> <span class="nb">list</span><span class="p">)):</span> |
| <span class="n">args</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">node</span><span class="p">]</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="vm">__class__</span><span class="p">,</span> <span class="s1">'_make'</span><span class="p">):</span> |
| <span class="c1"># namedtuples require unpacked arguments in their constructor</span> |
| <span class="k">return</span> <span class="n">node</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">node</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">node</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span> |
| <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">node</span><span class="o">.</span><span class="n">items</span><span class="p">()})</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">node</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_SetInputPValues</span><span class="p">(</span><span class="n">_PValueishTransform</span><span class="p">):</span> |
| <span class="k">def</span> <span class="nf">visit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">node</span><span class="p">,</span> <span class="n">replacements</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">id</span><span class="p">(</span><span class="n">node</span><span class="p">)</span> <span class="ow">in</span> <span class="n">replacements</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">replacements</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="n">node</span><span class="p">)]</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">visit_nested</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">replacements</span><span class="p">)</span> |
| |
| |
| <span class="c1"># Caches to allow for materialization of values when executing a pipeline</span> |
| <span class="c1"># in-process, in eager mode. This cache allows the same _MaterializedResult</span> |
| <span class="c1"># object to be accessed and used despite Runner API round-trip serialization.</span> |
| <span class="n">_pipeline_materialization_cache</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="p">}</span> <span class="c1"># type: Dict[Tuple[int, int], Dict[int, _MaterializedResult]]</span> |
| <span class="n">_pipeline_materialization_lock</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Lock</span><span class="p">()</span> |
| |
| |
| <span class="k">def</span> <span class="nf">_allocate_materialized_pipeline</span><span class="p">(</span><span class="n">pipeline</span><span class="p">):</span> |
| <span class="c1"># type: (Pipeline) -> None</span> |
| <span class="n">pid</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getpid</span><span class="p">()</span> |
| <span class="k">with</span> <span class="n">_pipeline_materialization_lock</span><span class="p">:</span> |
| <span class="n">pipeline_id</span> <span class="o">=</span> <span class="nb">id</span><span class="p">(</span><span class="n">pipeline</span><span class="p">)</span> |
| <span class="n">_pipeline_materialization_cache</span><span class="p">[(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)]</span> <span class="o">=</span> <span class="p">{}</span> |
| |
| |
| <span class="k">def</span> <span class="nf">_allocate_materialized_result</span><span class="p">(</span><span class="n">pipeline</span><span class="p">):</span> |
| <span class="c1"># type: (Pipeline) -> _MaterializedResult</span> |
| <span class="n">pid</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getpid</span><span class="p">()</span> |
| <span class="k">with</span> <span class="n">_pipeline_materialization_lock</span><span class="p">:</span> |
| <span class="n">pipeline_id</span> <span class="o">=</span> <span class="nb">id</span><span class="p">(</span><span class="n">pipeline</span><span class="p">)</span> |
| <span class="k">if</span> <span class="p">(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">_pipeline_materialization_cache</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s1">'Materialized pipeline is not allocated for result '</span> |
| <span class="s1">'cache.'</span><span class="p">)</span> |
| <span class="n">result_id</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">_pipeline_materialization_cache</span><span class="p">[(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)])</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">_MaterializedResult</span><span class="p">(</span><span class="n">pipeline_id</span><span class="p">,</span> <span class="n">result_id</span><span class="p">)</span> |
| <span class="n">_pipeline_materialization_cache</span><span class="p">[(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)][</span><span class="n">result_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span> |
| <span class="k">return</span> <span class="n">result</span> |
| |
| |
| <span class="k">def</span> <span class="nf">_get_materialized_result</span><span class="p">(</span><span class="n">pipeline_id</span><span class="p">,</span> <span class="n">result_id</span><span class="p">):</span> |
| <span class="c1"># type: (int, int) -> _MaterializedResult</span> |
| <span class="n">pid</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getpid</span><span class="p">()</span> |
| <span class="k">with</span> <span class="n">_pipeline_materialization_lock</span><span class="p">:</span> |
| <span class="k">if</span> <span class="p">(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">_pipeline_materialization_cache</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span> |
| <span class="s1">'Materialization in out-of-process and remote runners is not yet '</span> |
| <span class="s1">'supported.'</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">_pipeline_materialization_cache</span><span class="p">[(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)][</span><span class="n">result_id</span><span class="p">]</span> |
| |
| |
| <span class="k">def</span> <span class="nf">_release_materialized_pipeline</span><span class="p">(</span><span class="n">pipeline</span><span class="p">):</span> |
| <span class="c1"># type: (Pipeline) -> None</span> |
| <span class="n">pid</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getpid</span><span class="p">()</span> |
| <span class="k">with</span> <span class="n">_pipeline_materialization_lock</span><span class="p">:</span> |
| <span class="n">pipeline_id</span> <span class="o">=</span> <span class="nb">id</span><span class="p">(</span><span class="n">pipeline</span><span class="p">)</span> |
| <span class="k">del</span> <span class="n">_pipeline_materialization_cache</span><span class="p">[(</span><span class="n">pid</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">)]</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_MaterializedResult</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pipeline_id</span><span class="p">,</span> <span class="n">result_id</span><span class="p">):</span> |
| <span class="c1"># type: (int, int) -> None</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_pipeline_id</span> <span class="o">=</span> <span class="n">pipeline_id</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_result_id</span> <span class="o">=</span> <span class="n">result_id</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">elements</span> <span class="o">=</span> <span class="p">[]</span> <span class="c1"># type: List[Any]</span> |
| |
| <span class="k">def</span> <span class="nf">__reduce__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="c1"># When unpickled (during Runner API roundtrip serailization), get the</span> |
| <span class="c1"># _MaterializedResult object from the cache so that values are written</span> |
| <span class="c1"># to the original _MaterializedResult when run in eager mode.</span> |
| <span class="k">return</span> <span class="p">(</span><span class="n">_get_materialized_result</span><span class="p">,</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_pipeline_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_result_id</span><span class="p">))</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_MaterializedDoOutputsTuple</span><span class="p">(</span><span class="n">pvalue</span><span class="o">.</span><span class="n">DoOutputsTuple</span><span class="p">):</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deferred</span><span class="p">,</span> <span class="n">results_by_tag</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">deferred</span><span class="o">.</span><span class="n">_tags</span><span class="p">,</span> <span class="n">deferred</span><span class="o">.</span><span class="n">_main_tag</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_deferred</span> <span class="o">=</span> <span class="n">deferred</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_results_by_tag</span> <span class="o">=</span> <span class="n">results_by_tag</span> |
| |
| <span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">tag</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_results_by_tag</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span> |
| <span class="s1">'Tag </span><span class="si">%r</span><span class="s1"> is not a a defined output tag of </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_deferred</span><span class="p">))</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_results_by_tag</span><span class="p">[</span><span class="n">tag</span><span class="p">]</span><span class="o">.</span><span class="n">elements</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_AddMaterializationTransforms</span><span class="p">(</span><span class="n">_PValueishTransform</span><span class="p">):</span> |
| <span class="k">def</span> <span class="nf">_materialize_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pipeline</span><span class="p">):</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">_allocate_materialized_result</span><span class="p">(</span><span class="n">pipeline</span><span class="p">)</span> |
| |
| <span class="c1"># Need to define _MaterializeValuesDoFn here to avoid circular</span> |
| <span class="c1"># dependencies.</span> |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">DoFn</span> |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">ParDo</span> |
| |
| <span class="k">class</span> <span class="nc">_MaterializeValuesDoFn</span><span class="p">(</span><span class="n">DoFn</span><span class="p">):</span> |
| <span class="k">def</span> <span class="nf">process</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">):</span> |
| <span class="n">result</span><span class="o">.</span><span class="n">elements</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">element</span><span class="p">)</span> |
| |
| <span class="n">materialization_label</span> <span class="o">=</span> <span class="s1">'_MaterializeValues</span><span class="si">%d</span><span class="s1">'</span> <span class="o">%</span> <span class="n">result</span><span class="o">.</span><span class="n">_result_id</span> |
| <span class="k">return</span> <span class="p">(</span><span class="n">materialization_label</span> <span class="o">>></span> <span class="n">ParDo</span><span class="p">(</span><span class="n">_MaterializeValuesDoFn</span><span class="p">()),</span> <span class="n">result</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">visit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">node</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PValue</span><span class="p">):</span> |
| <span class="n">transform</span><span class="p">,</span> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_materialize_transform</span><span class="p">(</span><span class="n">node</span><span class="o">.</span><span class="n">pipeline</span><span class="p">)</span> |
| <span class="n">node</span> <span class="o">|</span> <span class="n">transform</span> |
| <span class="k">return</span> <span class="n">result</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">DoOutputsTuple</span><span class="p">):</span> |
| <span class="n">results_by_tag</span> <span class="o">=</span> <span class="p">{}</span> |
| <span class="k">for</span> <span class="n">tag</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">([</span><span class="n">node</span><span class="o">.</span><span class="n">_main_tag</span><span class="p">],</span> <span class="n">node</span><span class="o">.</span><span class="n">_tags</span><span class="p">):</span> |
| <span class="n">results_by_tag</span><span class="p">[</span><span class="n">tag</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">node</span><span class="p">[</span><span class="n">tag</span><span class="p">])</span> |
| <span class="k">return</span> <span class="n">_MaterializedDoOutputsTuple</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">results_by_tag</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">visit_nested</span><span class="p">(</span><span class="n">node</span><span class="p">)</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_FinalizeMaterialization</span><span class="p">(</span><span class="n">_PValueishTransform</span><span class="p">):</span> |
| <span class="k">def</span> <span class="nf">visit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">node</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">_MaterializedResult</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">node</span><span class="o">.</span><span class="n">elements</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="n">_MaterializedDoOutputsTuple</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">node</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">visit_nested</span><span class="p">(</span><span class="n">node</span><span class="p">)</span> |
| |
| |
| <span class="k">def</span> <span class="nf">get_named_nested_pvalues</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">as_inputs</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span> |
| <span class="c1"># Check to see if it's a named tuple.</span> |
| <span class="n">fields</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="s1">'_fields'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">fields</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">fields</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">):</span> |
| <span class="n">tagged_values</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">tagged_values</span> <span class="o">=</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">as_inputs</span><span class="p">:</span> |
| <span class="c1"># Full list treated as a list of value for eager evaluation.</span> |
| <span class="k">yield</span> <span class="kc">None</span><span class="p">,</span> <span class="n">pvalueish</span> |
| <span class="k">return</span> |
| <span class="n">tagged_values</span> <span class="o">=</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> |
| <span class="n">tagged_values</span> <span class="o">=</span> <span class="n">pvalueish</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">as_inputs</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> |
| <span class="p">(</span><span class="n">pvalue</span><span class="o">.</span><span class="n">PValue</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">DoOutputsTuple</span><span class="p">)):</span> |
| <span class="k">yield</span> <span class="kc">None</span><span class="p">,</span> <span class="n">pvalueish</span> |
| <span class="k">return</span> |
| |
| <span class="k">for</span> <span class="n">tag</span><span class="p">,</span> <span class="n">subvalue</span> <span class="ow">in</span> <span class="n">tagged_values</span><span class="p">:</span> |
| <span class="k">for</span> <span class="n">subtag</span><span class="p">,</span> <span class="n">subsubvalue</span> <span class="ow">in</span> <span class="n">get_named_nested_pvalues</span><span class="p">(</span> |
| <span class="n">subvalue</span><span class="p">,</span> <span class="n">as_inputs</span><span class="o">=</span><span class="n">as_inputs</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">subtag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">yield</span> <span class="n">tag</span><span class="p">,</span> <span class="n">subsubvalue</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">yield</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="n">subtag</span><span class="p">),</span> <span class="n">subsubvalue</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_ZipPValues</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span> |
| <span class="sd">"""Pairs each PValue in a pvalueish with a value in a parallel out sibling.</span> |
| |
| <span class="sd"> Sibling should have the same nested structure as pvalueish. Leaves in</span> |
| <span class="sd"> sibling are expanded across nested pvalueish lists, tuples, and dicts.</span> |
| <span class="sd"> For example</span> |
| |
| <span class="sd"> ZipPValues().visit({'a': pc1, 'b': (pc2, pc3)},</span> |
| <span class="sd"> {'a': 'A', 'b', 'B'})</span> |
| |
| <span class="sd"> will return</span> |
| |
| <span class="sd"> [('a', pc1, 'A'), ('b', pc2, 'B'), ('b', pc3, 'B')]</span> |
| <span class="sd"> """</span> |
| <span class="k">def</span> <span class="nf">visit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">context</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">pairs</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="n">pairs</span> <span class="o">=</span> <span class="p">[]</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">pairs</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="p">(</span><span class="n">pvalue</span><span class="o">.</span><span class="n">PValue</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">DoOutputsTuple</span><span class="p">)):</span> |
| <span class="n">pairs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">context</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">))</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit_sequence</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit_dict</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">visit_sequence</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sibling</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span> |
| <span class="k">for</span> <span class="n">ix</span><span class="p">,</span> <span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> |
| <span class="nb">list</span><span class="p">(</span><span class="n">sibling</span><span class="p">)</span> <span class="o">+</span> <span class="p">[</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">))):</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">s</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="s1">'position </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">ix</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">pvalueish</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">visit_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sibling</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> |
| <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">pvalueish</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">sibling</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">),</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">pvalueish</span><span class="o">.</span><span class="n">values</span><span class="p">():</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">sibling</span><span class="p">,</span> <span class="n">pairs</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="PTransform"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform">[docs]</a><span class="k">class</span> <span class="nc">PTransform</span><span class="p">(</span><span class="n">WithTypeHints</span><span class="p">,</span> <span class="n">HasDisplayData</span><span class="p">):</span> |
| <span class="sd">"""A transform object used to modify one or more PCollections.</span> |
| |
| <span class="sd"> Subclasses must define an expand() method that will be used when the transform</span> |
| <span class="sd"> is applied to some arguments. Typical usage pattern will be:</span> |
| |
| <span class="sd"> input | CustomTransform(...)</span> |
| |
| <span class="sd"> The expand() method of the CustomTransform object passed in will be called</span> |
| <span class="sd"> with input as an argument.</span> |
| <span class="sd"> """</span> |
| <span class="c1"># By default, transforms don't have any side inputs.</span> |
| <span class="n">side_inputs</span> <span class="o">=</span> <span class="p">()</span> <span class="c1"># type: Sequence[pvalue.AsSideInput]</span> |
| |
| <span class="c1"># Used for nullary transforms.</span> |
| <span class="n">pipeline</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: Optional[Pipeline]</span> |
| |
| <span class="c1"># Default is unset.</span> |
| <span class="n">_user_label</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: Optional[str]</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="c1"># type: (Optional[str]) -> None</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">label</span> <span class="o">=</span> <span class="n">label</span> <span class="c1"># type: ignore # https://github.com/python/mypy/issues/3004</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="c1"># type: () -> str</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_user_label</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_label</span><span class="p">()</span> |
| |
| <span class="nd">@label</span><span class="o">.</span><span class="n">setter</span> |
| <span class="k">def</span> <span class="nf">label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span> |
| <span class="c1"># type: (Optional[str]) -> None</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_user_label</span> <span class="o">=</span> <span class="n">value</span> |
| |
| <div class="viewcode-block" id="PTransform.default_label"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.default_label">[docs]</a> <span class="k">def</span> <span class="nf">default_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="c1"># type: () -> str</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span></div> |
| |
| <div class="viewcode-block" id="PTransform.annotations"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.annotations">[docs]</a> <span class="k">def</span> <span class="nf">annotations</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bytes</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="n">message</span><span class="o">.</span><span class="n">Message</span><span class="p">]]:</span> |
| <span class="k">return</span> <span class="p">{}</span></div> |
| |
| <div class="viewcode-block" id="PTransform.default_type_hints"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.default_type_hints">[docs]</a> <span class="k">def</span> <span class="nf">default_type_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="n">fn_type_hints</span> <span class="o">=</span> <span class="n">IOTypeHints</span><span class="o">.</span><span class="n">from_callable</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">expand</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">fn_type_hints</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="n">fn_type_hints</span> <span class="o">=</span> <span class="n">fn_type_hints</span><span class="o">.</span><span class="n">strip_pcoll</span><span class="p">()</span> |
| |
| <span class="c1"># Prefer class decorator type hints for backwards compatibility.</span> |
| <span class="k">return</span> <span class="n">get_type_hints</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span><span class="o">.</span><span class="n">with_defaults</span><span class="p">(</span><span class="n">fn_type_hints</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.with_input_types"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.with_input_types">[docs]</a> <span class="k">def</span> <span class="nf">with_input_types</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_type_hint</span><span class="p">):</span> |
| <span class="sd">"""Annotates the input type of a :class:`PTransform` with a type-hint.</span> |
| |
| <span class="sd"> Args:</span> |
| <span class="sd"> input_type_hint (type): An instance of an allowed built-in type, a custom</span> |
| <span class="sd"> class, or an instance of a</span> |
| <span class="sd"> :class:`~apache_beam.typehints.typehints.TypeConstraint`.</span> |
| |
| <span class="sd"> Raises:</span> |
| <span class="sd"> TypeError: If **input_type_hint** is not a valid type-hint.</span> |
| <span class="sd"> See</span> |
| <span class="sd"> :obj:`apache_beam.typehints.typehints.validate_composite_type_param()`</span> |
| <span class="sd"> for further details.</span> |
| |
| <span class="sd"> Returns:</span> |
| <span class="sd"> PTransform: A reference to the instance of this particular</span> |
| <span class="sd"> :class:`PTransform` object. This allows chaining type-hinting related</span> |
| <span class="sd"> methods.</span> |
| <span class="sd"> """</span> |
| <span class="n">input_type_hint</span> <span class="o">=</span> <span class="n">native_type_compatibility</span><span class="o">.</span><span class="n">convert_to_beam_type</span><span class="p">(</span> |
| <span class="n">input_type_hint</span><span class="p">)</span> |
| <span class="n">validate_composite_type_param</span><span class="p">(</span> |
| <span class="n">input_type_hint</span><span class="p">,</span> <span class="s1">'Type hints for a PTransform'</span><span class="p">)</span> |
| <span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">with_input_types</span><span class="p">(</span><span class="n">input_type_hint</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.with_output_types"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.with_output_types">[docs]</a> <span class="k">def</span> <span class="nf">with_output_types</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">type_hint</span><span class="p">):</span> |
| <span class="sd">"""Annotates the output type of a :class:`PTransform` with a type-hint.</span> |
| |
| <span class="sd"> Args:</span> |
| <span class="sd"> type_hint (type): An instance of an allowed built-in type, a custom class,</span> |
| <span class="sd"> or a :class:`~apache_beam.typehints.typehints.TypeConstraint`.</span> |
| |
| <span class="sd"> Raises:</span> |
| <span class="sd"> TypeError: If **type_hint** is not a valid type-hint. See</span> |
| <span class="sd"> :obj:`~apache_beam.typehints.typehints.validate_composite_type_param()`</span> |
| <span class="sd"> for further details.</span> |
| |
| <span class="sd"> Returns:</span> |
| <span class="sd"> PTransform: A reference to the instance of this particular</span> |
| <span class="sd"> :class:`PTransform` object. This allows chaining type-hinting related</span> |
| <span class="sd"> methods.</span> |
| <span class="sd"> """</span> |
| <span class="n">type_hint</span> <span class="o">=</span> <span class="n">native_type_compatibility</span><span class="o">.</span><span class="n">convert_to_beam_type</span><span class="p">(</span><span class="n">type_hint</span><span class="p">)</span> |
| <span class="n">validate_composite_type_param</span><span class="p">(</span><span class="n">type_hint</span><span class="p">,</span> <span class="s1">'Type hints for a PTransform'</span><span class="p">)</span> |
| <span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">with_output_types</span><span class="p">(</span><span class="n">type_hint</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.with_resource_hints"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.with_resource_hints">[docs]</a> <span class="k">def</span> <span class="nf">with_resource_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> <span class="c1"># type: (...) -> PTransform</span> |
| <span class="sd">"""Adds resource hints to the :class:`PTransform`.</span> |
| |
| <span class="sd"> Resource hints allow users to express constraints on the environment where</span> |
| <span class="sd"> the transform should be executed. Interpretation of the resource hints is</span> |
| <span class="sd"> defined by Beam Runners. Runners may ignore the unsupported hints.</span> |
| |
| <span class="sd"> Args:</span> |
| <span class="sd"> **kwargs: key-value pairs describing hints and their values.</span> |
| |
| <span class="sd"> Raises:</span> |
| <span class="sd"> ValueError: if provided hints are unknown to the SDK. See</span> |
| <span class="sd"> :mod:`apache_beam.transforms.resources` for a list of known hints.</span> |
| |
| <span class="sd"> Returns:</span> |
| <span class="sd"> PTransform: A reference to the instance of this particular</span> |
| <span class="sd"> :class:`PTransform` object.</span> |
| <span class="sd"> """</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">get_resource_hints</span><span class="p">()</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">resources</span><span class="o">.</span><span class="n">parse_resource_hints</span><span class="p">(</span><span class="n">kwargs</span><span class="p">))</span> |
| <span class="k">return</span> <span class="bp">self</span></div> |
| |
| <div class="viewcode-block" id="PTransform.get_resource_hints"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.get_resource_hints">[docs]</a> <span class="k">def</span> <span class="nf">get_resource_hints</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="c1"># type: () -> Dict[str, bytes]</span> |
| <span class="k">if</span> <span class="s1">'_resource_hints'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">:</span> |
| <span class="c1"># PTransform subclasses don't always call super(), so prefer lazy</span> |
| <span class="c1"># initialization. By default, transforms don't have any resource hints.</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_resource_hints</span> <span class="o">=</span> <span class="p">{}</span> <span class="c1"># type: Dict[str, bytes]</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_resource_hints</span></div> |
| |
| <div class="viewcode-block" id="PTransform.type_check_inputs"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.type_check_inputs">[docs]</a> <span class="k">def</span> <span class="nf">type_check_inputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">):</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">type_check_inputs_or_outputs</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="s1">'input'</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.infer_output_type"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.infer_output_type">[docs]</a> <span class="k">def</span> <span class="nf">infer_output_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">unused_input_type</span><span class="p">):</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_type_hints</span><span class="p">()</span><span class="o">.</span><span class="n">simple_output_type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">)</span> <span class="ow">or</span> <span class="n">typehints</span><span class="o">.</span><span class="n">Any</span></div> |
| |
| <div class="viewcode-block" id="PTransform.type_check_outputs"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.type_check_outputs">[docs]</a> <span class="k">def</span> <span class="nf">type_check_outputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">):</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">type_check_inputs_or_outputs</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="s1">'output'</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.type_check_inputs_or_outputs"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.type_check_inputs_or_outputs">[docs]</a> <span class="k">def</span> <span class="nf">type_check_inputs_or_outputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">input_or_output</span><span class="p">):</span> |
| <span class="n">type_hints</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_type_hints</span><span class="p">()</span> |
| <span class="n">hints</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">type_hints</span><span class="p">,</span> <span class="n">input_or_output</span> <span class="o">+</span> <span class="s1">'_types'</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">hints</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">any</span><span class="p">(</span><span class="n">hints</span><span class="p">):</span> |
| <span class="k">return</span> |
| <span class="n">arg_hints</span><span class="p">,</span> <span class="n">kwarg_hints</span> <span class="o">=</span> <span class="n">hints</span> |
| <span class="k">if</span> <span class="n">arg_hints</span> <span class="ow">and</span> <span class="n">kwarg_hints</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">TypeCheckError</span><span class="p">(</span> |
| <span class="s1">'PTransform cannot have both positional and keyword type hints '</span> |
| <span class="s1">'without overriding </span><span class="si">%s</span><span class="s1">._type_check_</span><span class="si">%s</span><span class="s1">()'</span> <span class="o">%</span> |
| <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="p">,</span> <span class="n">input_or_output</span><span class="p">))</span> |
| <span class="n">root_hint</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">arg_hints</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">arg_hints</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="n">arg_hints</span> <span class="ow">or</span> <span class="n">kwarg_hints</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">context</span><span class="p">,</span> <span class="n">pvalue_</span><span class="p">,</span> <span class="n">hint</span> <span class="ow">in</span> <span class="n">_ZipPValues</span><span class="p">()</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">root_hint</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalue_</span><span class="p">,</span> <span class="n">DoOutputsTuple</span><span class="p">):</span> |
| <span class="k">continue</span> |
| <span class="k">if</span> <span class="n">pvalue_</span><span class="o">.</span><span class="n">element_type</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="c1"># TODO(robertwb): It's a bug that we ever get here. (typecheck)</span> |
| <span class="k">continue</span> |
| <span class="k">if</span> <span class="n">hint</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">typehints</span><span class="o">.</span><span class="n">is_consistent_with</span><span class="p">(</span><span class="n">pvalue_</span><span class="o">.</span><span class="n">element_type</span><span class="p">,</span> <span class="n">hint</span><span class="p">):</span> |
| <span class="n">at_context</span> <span class="o">=</span> <span class="s1">' </span><span class="si">%s</span><span class="s1"> </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">input_or_output</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> <span class="k">if</span> <span class="n">context</span> <span class="k">else</span> <span class="s1">''</span> |
| <span class="k">raise</span> <span class="n">TypeCheckError</span><span class="p">(</span> |
| <span class="s1">'</span><span class="si">{type}</span><span class="s1"> type hint violation at </span><span class="si">{label}{context}</span><span class="s1">: expected </span><span class="si">{hint}</span><span class="s1">, '</span> |
| <span class="s1">'got </span><span class="si">{actual_type}</span><span class="se">\n</span><span class="s1">Full type hint:</span><span class="se">\n</span><span class="si">{debug_str}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> |
| <span class="nb">type</span><span class="o">=</span><span class="n">input_or_output</span><span class="o">.</span><span class="n">title</span><span class="p">(),</span> |
| <span class="n">label</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">,</span> |
| <span class="n">context</span><span class="o">=</span><span class="n">at_context</span><span class="p">,</span> |
| <span class="n">hint</span><span class="o">=</span><span class="n">hint</span><span class="p">,</span> |
| <span class="n">actual_type</span><span class="o">=</span><span class="n">pvalue_</span><span class="o">.</span><span class="n">element_type</span><span class="p">,</span> |
| <span class="n">debug_str</span><span class="o">=</span><span class="n">type_hints</span><span class="o">.</span><span class="n">debug_str</span><span class="p">()))</span></div> |
| |
| <span class="k">def</span> <span class="nf">_infer_output_coder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_type</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">input_coder</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="c1"># type: (...) -> Optional[coders.Coder]</span> |
| |
| <span class="sd">"""Returns the output coder to use for output of this transform.</span> |
| |
| <span class="sd"> Note: this API is experimental and is subject to change; please do not rely</span> |
| <span class="sd"> on behavior induced by this method.</span> |
| |
| <span class="sd"> The Coder returned here should not be wrapped in a WindowedValueCoder</span> |
| <span class="sd"> wrapper.</span> |
| |
| <span class="sd"> Args:</span> |
| <span class="sd"> input_type: An instance of an allowed built-in type, a custom class, or a</span> |
| <span class="sd"> typehints.TypeConstraint for the input type, or None if not available.</span> |
| <span class="sd"> input_coder: Coder object for encoding input to this PTransform, or None</span> |
| <span class="sd"> if not available.</span> |
| |
| <span class="sd"> Returns:</span> |
| <span class="sd"> Coder object for encoding output of this PTransform or None if unknown.</span> |
| <span class="sd"> """</span> |
| <span class="c1"># TODO(ccy): further refine this API.</span> |
| <span class="k">return</span> <span class="kc">None</span> |
| |
| <span class="k">def</span> <span class="nf">_clone</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">new_label</span><span class="p">):</span> |
| <span class="sd">"""Clones the current transform instance under a new label."""</span> |
| <span class="n">transform</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> |
| <span class="n">transform</span><span class="o">.</span><span class="n">label</span> <span class="o">=</span> <span class="n">new_label</span> |
| <span class="k">return</span> <span class="n">transform</span> |
| |
| <div class="viewcode-block" id="PTransform.expand"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.expand">[docs]</a> <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_or_inputs</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span></div> |
| |
| <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">return</span> <span class="s1">'<</span><span class="si">%s</span><span class="s1">>'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_str_internal</span><span class="p">()</span> |
| |
| <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">return</span> <span class="s1">'<</span><span class="si">%s</span><span class="s1"> at </span><span class="si">%s</span><span class="s1">>'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_str_internal</span><span class="p">(),</span> <span class="nb">hex</span><span class="p">(</span><span class="nb">id</span><span class="p">(</span><span class="bp">self</span><span class="p">)))</span> |
| |
| <span class="k">def</span> <span class="nf">_str_internal</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">return</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">(PTransform)</span><span class="si">%s%s%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span> |
| <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> |
| <span class="s1">' label=[</span><span class="si">%s</span><span class="s1">]'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">label</span> <span class="k">if</span> |
| <span class="p">(</span><span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'label'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">)</span> <span class="k">else</span> <span class="s1">''</span><span class="p">,</span> |
| <span class="s1">' inputs=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">inputs</span><span class="p">)</span> <span class="k">if</span> |
| <span class="p">(</span><span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'inputs'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">inputs</span><span class="p">)</span> <span class="k">else</span> <span class="s1">''</span><span class="p">,</span> |
| <span class="s1">' side_inputs=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">side_inputs</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">side_inputs</span> <span class="k">else</span> <span class="s1">''</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">_check_pcollection</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pcoll</span><span class="p">):</span> |
| <span class="c1"># type: (pvalue.PCollection) -> None</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pcoll</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PCollection</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">error</span><span class="o">.</span><span class="n">TransformError</span><span class="p">(</span><span class="s1">'Expecting a PCollection argument.'</span><span class="p">)</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">pcoll</span><span class="o">.</span><span class="n">pipeline</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">error</span><span class="o">.</span><span class="n">TransformError</span><span class="p">(</span><span class="s1">'PCollection not part of a pipeline.'</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="PTransform.get_windowing"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.get_windowing">[docs]</a> <span class="k">def</span> <span class="nf">get_windowing</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">inputs</span><span class="p">):</span> |
| <span class="c1"># type: (Any) -> Windowing</span> |
| |
| <span class="sd">"""Returns the window function to be associated with transform's output.</span> |
| |
| <span class="sd"> By default most transforms just return the windowing function associated</span> |
| <span class="sd"> with the input PCollection (or the first input if several).</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="n">inputs</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">inputs</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">windowing</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.core</span> <span class="kn">import</span> <span class="n">Windowing</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.window</span> <span class="kn">import</span> <span class="n">GlobalWindows</span> |
| <span class="c1"># TODO(robertwb): Return something compatible with every windowing?</span> |
| <span class="k">return</span> <span class="n">Windowing</span><span class="p">(</span><span class="n">GlobalWindows</span><span class="p">())</span></div> |
| |
| <span class="k">def</span> <span class="fm">__rrshift__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">_NamedPTransform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__or__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">right</span><span class="p">):</span> |
| <span class="sd">"""Used to compose PTransforms, e.g., ptransform1 | ptransform2."""</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">PTransform</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">_ChainedPTransform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">right</span><span class="p">)</span> |
| <span class="k">return</span> <span class="bp">NotImplemented</span> |
| |
| <span class="k">def</span> <span class="fm">__ror__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="sd">"""Used to apply this PTransform to non-PValues, e.g., a tuple."""</span> |
| <span class="n">pvalueish</span><span class="p">,</span> <span class="n">pvalues</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_extract_input_pvalues</span><span class="p">(</span><span class="n">left</span><span class="p">)</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalues</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span> |
| <span class="n">pvalues</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">pvalues</span><span class="o">.</span><span class="n">values</span><span class="p">())</span> |
| <span class="n">pipelines</span> <span class="o">=</span> <span class="p">[</span><span class="n">v</span><span class="o">.</span><span class="n">pipeline</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">pvalues</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PValue</span><span class="p">)]</span> |
| <span class="k">if</span> <span class="n">pvalues</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">pipelines</span><span class="p">:</span> |
| <span class="n">deferred</span> <span class="o">=</span> <span class="kc">False</span> |
| <span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span> |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">pipeline</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.options.pipeline_options</span> <span class="kn">import</span> <span class="n">PipelineOptions</span> |
| <span class="c1"># pylint: enable=wrong-import-order, wrong-import-position</span> |
| <span class="n">p</span> <span class="o">=</span> <span class="n">pipeline</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">(</span><span class="s1">'DirectRunner'</span><span class="p">,</span> <span class="n">PipelineOptions</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">pipelines</span><span class="p">:</span> |
| <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">pipeline</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="n">p</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pipeline</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s1">'"</span><span class="si">%s</span><span class="s1">" requires a pipeline to be specified '</span> |
| <span class="s1">'as there are no deferred inputs.'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">p</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pipeline</span> <span class="ow">or</span> <span class="n">pipelines</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="k">for</span> <span class="n">pp</span> <span class="ow">in</span> <span class="n">pipelines</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">p</span> <span class="o">!=</span> <span class="n">pp</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s1">'Mixing values in different pipelines is not allowed.'</span> |
| <span class="s1">'</span><span class="se">\n</span><span class="s1">{</span><span class="si">%r</span><span class="s1">} != {</span><span class="si">%r</span><span class="s1">}'</span> <span class="o">%</span> <span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">pp</span><span class="p">))</span> |
| <span class="n">deferred</span> <span class="o">=</span> <span class="ow">not</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">runner</span><span class="p">,</span> <span class="s1">'is_eager'</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> |
| <span class="c1"># pylint: disable=wrong-import-order, wrong-import-position</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.transforms.core</span> <span class="kn">import</span> <span class="n">Create</span> |
| <span class="c1"># pylint: enable=wrong-import-order, wrong-import-position</span> |
| <span class="n">replacements</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="nb">id</span><span class="p">(</span><span class="n">v</span><span class="p">):</span> <span class="n">p</span> <span class="o">|</span> <span class="s1">'CreatePInput</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">ix</span> <span class="o">>></span> <span class="n">Create</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">reshuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> |
| <span class="k">for</span> <span class="p">(</span><span class="n">ix</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">pvalues</span><span class="p">)</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PValue</span><span class="p">)</span> <span class="ow">and</span> <span class="n">v</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| <span class="p">}</span> |
| <span class="n">pvalueish</span> <span class="o">=</span> <span class="n">_SetInputPValues</span><span class="p">()</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">replacements</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">pipeline</span> <span class="o">=</span> <span class="n">p</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">deferred</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">result</span> |
| <span class="n">_allocate_materialized_pipeline</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> |
| <span class="n">materialized_result</span> <span class="o">=</span> <span class="n">_AddMaterializationTransforms</span><span class="p">()</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> |
| <span class="n">p</span><span class="o">.</span><span class="n">run</span><span class="p">()</span><span class="o">.</span><span class="n">wait_until_finish</span><span class="p">()</span> |
| <span class="n">_release_materialized_pipeline</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">_FinalizeMaterialization</span><span class="p">()</span><span class="o">.</span><span class="n">visit</span><span class="p">(</span><span class="n">materialized_result</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">_extract_input_pvalues</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">):</span> |
| <span class="sd">"""Extract all the pvalues contained in the input pvalueish.</span> |
| |
| <span class="sd"> Returns pvalueish as well as the flat inputs list as the input may have to</span> |
| <span class="sd"> be copied as inspection may be destructive.</span> |
| |
| <span class="sd"> By default, recursively extracts tuple components and dict values.</span> |
| |
| <span class="sd"> Generally only needs to be overriden for multi-input PTransforms.</span> |
| <span class="sd"> """</span> |
| <span class="c1"># pylint: disable=wrong-import-order</span> |
| <span class="kn">from</span> <span class="nn">apache_beam</span> <span class="kn">import</span> <span class="n">pipeline</span> |
| <span class="c1"># pylint: enable=wrong-import-order</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="n">pipeline</span><span class="o">.</span><span class="n">Pipeline</span><span class="p">):</span> |
| <span class="n">pvalueish</span> <span class="o">=</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PBegin</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="p">{</span> |
| <span class="nb">str</span><span class="p">(</span><span class="n">tag</span><span class="p">):</span> <span class="n">value</span> |
| <span class="k">for</span> <span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">get_named_nested_pvalues</span><span class="p">(</span> |
| <span class="n">pvalueish</span><span class="p">,</span> <span class="n">as_inputs</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="p">}</span> |
| |
| <span class="k">def</span> <span class="nf">_pvaluish_from_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_dict</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">input_dict</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> |
| <span class="k">return</span> <span class="nb">next</span><span class="p">(</span><span class="nb">iter</span><span class="p">(</span><span class="n">input_dict</span><span class="o">.</span><span class="n">values</span><span class="p">()))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">input_dict</span> |
| |
| <span class="k">def</span> <span class="nf">_named_inputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">main_inputs</span><span class="p">,</span> <span class="n">side_inputs</span><span class="p">):</span> |
| <span class="c1"># type: (Mapping[str, pvalue.PValue], Sequence[Any]) -> Dict[str, pvalue.PValue]</span> |
| |
| <span class="sd">"""Returns the dictionary of named inputs (including side inputs) as they</span> |
| <span class="sd"> should be named in the beam proto.</span> |
| <span class="sd"> """</span> |
| <span class="n">main_inputs</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="n">tag</span><span class="p">:</span> <span class="nb">input</span> |
| <span class="k">for</span> <span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="nb">input</span><span class="p">)</span> <span class="ow">in</span> <span class="n">main_inputs</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PCollection</span><span class="p">)</span> |
| <span class="p">}</span> |
| <span class="n">named_side_inputs</span> <span class="o">=</span> <span class="p">{(</span><span class="n">SIDE_INPUT_PREFIX</span> <span class="o">+</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">'</span><span class="p">)</span> <span class="o">%</span> <span class="n">ix</span><span class="p">:</span> <span class="n">si</span><span class="o">.</span><span class="n">pvalue</span> |
| <span class="k">for</span> <span class="p">(</span><span class="n">ix</span><span class="p">,</span> <span class="n">si</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">side_inputs</span><span class="p">)}</span> |
| <span class="k">return</span> <span class="nb">dict</span><span class="p">(</span><span class="n">main_inputs</span><span class="p">,</span> <span class="o">**</span><span class="n">named_side_inputs</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">_named_outputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">outputs</span><span class="p">):</span> |
| <span class="c1"># type: (Dict[object, pvalue.PCollection]) -> Dict[str, pvalue.PCollection]</span> |
| |
| <span class="sd">"""Returns the dictionary of named outputs as they should be named in the</span> |
| <span class="sd"> beam proto.</span> |
| <span class="sd"> """</span> |
| <span class="c1"># TODO(BEAM-1833): Push names up into the sdk construction.</span> |
| <span class="k">return</span> <span class="p">{</span> |
| <span class="nb">str</span><span class="p">(</span><span class="n">tag</span><span class="p">):</span> <span class="n">output</span> |
| <span class="k">for</span> <span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="n">output</span><span class="p">)</span> <span class="ow">in</span> <span class="n">outputs</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PCollection</span><span class="p">)</span> |
| <span class="p">}</span> |
| |
| <span class="n">_known_urns</span> <span class="o">=</span> <span class="p">{}</span> <span class="c1"># type: Dict[str, Tuple[Optional[type], ConstructorFn]]</span> |
| |
| <span class="nd">@classmethod</span> |
| <span class="nd">@overload</span> |
| <span class="k">def</span> <span class="nf">register_urn</span><span class="p">(</span> |
| <span class="bp">cls</span><span class="p">,</span> |
| <span class="n">urn</span><span class="p">,</span> <span class="c1"># type: str</span> |
| <span class="n">parameter_type</span><span class="p">,</span> <span class="c1"># type: Type[T]</span> |
| <span class="p">):</span> |
| <span class="c1"># type: (...) -> Callable[[Union[type, Callable[[beam_runner_api_pb2.PTransform, T, PipelineContext], Any]]], Callable[[T, PipelineContext], Any]]</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@classmethod</span> |
| <span class="nd">@overload</span> |
| <span class="k">def</span> <span class="nf">register_urn</span><span class="p">(</span> |
| <span class="bp">cls</span><span class="p">,</span> |
| <span class="n">urn</span><span class="p">,</span> <span class="c1"># type: str</span> |
| <span class="n">parameter_type</span><span class="p">,</span> <span class="c1"># type: None</span> |
| <span class="p">):</span> |
| <span class="c1"># type: (...) -> Callable[[Union[type, Callable[[beam_runner_api_pb2.PTransform, bytes, PipelineContext], Any]]], Callable[[bytes, PipelineContext], Any]]</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@classmethod</span> |
| <span class="nd">@overload</span> |
| <span class="k">def</span> <span class="nf">register_urn</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> |
| <span class="n">urn</span><span class="p">,</span> <span class="c1"># type: str</span> |
| <span class="n">parameter_type</span><span class="p">,</span> <span class="c1"># type: Type[T]</span> |
| <span class="n">constructor</span> <span class="c1"># type: Callable[[beam_runner_api_pb2.PTransform, T, PipelineContext], Any]</span> |
| <span class="p">):</span> |
| <span class="c1"># type: (...) -> None</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@classmethod</span> |
| <span class="nd">@overload</span> |
| <span class="k">def</span> <span class="nf">register_urn</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> |
| <span class="n">urn</span><span class="p">,</span> <span class="c1"># type: str</span> |
| <span class="n">parameter_type</span><span class="p">,</span> <span class="c1"># type: None</span> |
| <span class="n">constructor</span> <span class="c1"># type: Callable[[beam_runner_api_pb2.PTransform, bytes, PipelineContext], Any]</span> |
| <span class="p">):</span> |
| <span class="c1"># type: (...) -> None</span> |
| <span class="k">pass</span> |
| |
| <div class="viewcode-block" id="PTransform.register_urn"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.register_urn">[docs]</a> <span class="nd">@classmethod</span> |
| <span class="k">def</span> <span class="nf">register_urn</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">urn</span><span class="p">,</span> <span class="n">parameter_type</span><span class="p">,</span> <span class="n">constructor</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="k">def</span> <span class="nf">register</span><span class="p">(</span><span class="n">constructor</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">constructor</span><span class="p">,</span> <span class="nb">type</span><span class="p">):</span> |
| <span class="n">constructor</span><span class="o">.</span><span class="n">from_runner_api_parameter</span> <span class="o">=</span> <span class="n">register</span><span class="p">(</span> |
| <span class="n">constructor</span><span class="o">.</span><span class="n">from_runner_api_parameter</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="bp">cls</span><span class="o">.</span><span class="n">_known_urns</span><span class="p">[</span><span class="n">urn</span><span class="p">]</span> <span class="o">=</span> <span class="n">parameter_type</span><span class="p">,</span> <span class="n">constructor</span> |
| <span class="k">return</span> <span class="n">constructor</span> |
| |
| <span class="k">if</span> <span class="n">constructor</span><span class="p">:</span> |
| <span class="c1"># Used as a statement.</span> |
| <span class="n">register</span><span class="p">(</span><span class="n">constructor</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="c1"># Used as a decorator.</span> |
| <span class="k">return</span> <span class="n">register</span></div> |
| |
| <div class="viewcode-block" id="PTransform.to_runner_api"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.to_runner_api">[docs]</a> <span class="k">def</span> <span class="nf">to_runner_api</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">,</span> <span class="n">has_parts</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">extra_kwargs</span><span class="p">):</span> |
| <span class="c1"># type: (PipelineContext, bool, Any) -> beam_runner_api_pb2.FunctionSpec</span> |
| <span class="kn">from</span> <span class="nn">apache_beam.portability.api</span> <span class="kn">import</span> <span class="n">beam_runner_api_pb2</span> |
| <span class="c1"># typing: only ParDo supports extra_kwargs</span> |
| <span class="n">urn</span><span class="p">,</span> <span class="n">typed_param</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_runner_api_parameter</span><span class="p">(</span><span class="n">context</span><span class="p">,</span> <span class="o">**</span><span class="n">extra_kwargs</span><span class="p">)</span> <span class="c1"># type: ignore[call-arg]</span> |
| <span class="k">if</span> <span class="n">urn</span> <span class="o">==</span> <span class="n">python_urns</span><span class="o">.</span><span class="n">GENERIC_COMPOSITE_TRANSFORM</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">has_parts</span><span class="p">:</span> |
| <span class="c1"># TODO(BEAM-3812): Remove this fallback.</span> |
| <span class="n">urn</span><span class="p">,</span> <span class="n">typed_param</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_runner_api_pickled</span><span class="p">(</span><span class="n">context</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">beam_runner_api_pb2</span><span class="o">.</span><span class="n">FunctionSpec</span><span class="p">(</span> |
| <span class="n">urn</span><span class="o">=</span><span class="n">urn</span><span class="p">,</span> |
| <span class="n">payload</span><span class="o">=</span><span class="n">typed_param</span><span class="o">.</span><span class="n">SerializeToString</span><span class="p">()</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span> |
| <span class="n">typed_param</span><span class="p">,</span> <span class="n">message</span><span class="o">.</span><span class="n">Message</span><span class="p">)</span> <span class="k">else</span> <span class="n">typed_param</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">typed_param</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">typed_param</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.from_runner_api"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.from_runner_api">[docs]</a> <span class="nd">@classmethod</span> |
| <span class="k">def</span> <span class="nf">from_runner_api</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> |
| <span class="n">proto</span><span class="p">,</span> <span class="c1"># type: Optional[beam_runner_api_pb2.PTransform]</span> |
| <span class="n">context</span> <span class="c1"># type: PipelineContext</span> |
| <span class="p">):</span> |
| <span class="c1"># type: (...) -> Optional[PTransform]</span> |
| <span class="k">if</span> <span class="n">proto</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">proto</span><span class="o">.</span><span class="n">spec</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">proto</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">urn</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">None</span> |
| <span class="n">parameter_type</span><span class="p">,</span> <span class="n">constructor</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_known_urns</span><span class="p">[</span><span class="n">proto</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">urn</span><span class="p">]</span> |
| |
| <span class="k">return</span> <span class="n">constructor</span><span class="p">(</span> |
| <span class="n">proto</span><span class="p">,</span> |
| <span class="n">proto_utils</span><span class="o">.</span><span class="n">parse_Bytes</span><span class="p">(</span><span class="n">proto</span><span class="o">.</span><span class="n">spec</span><span class="o">.</span><span class="n">payload</span><span class="p">,</span> <span class="n">parameter_type</span><span class="p">),</span> |
| <span class="n">context</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="PTransform.to_runner_api_parameter"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.to_runner_api_parameter">[docs]</a> <span class="k">def</span> <span class="nf">to_runner_api_parameter</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">unused_context</span> <span class="c1"># type: PipelineContext</span> |
| <span class="p">):</span> |
| <span class="c1"># type: (...) -> Tuple[str, Optional[Union[message.Message, bytes, str]]]</span> |
| <span class="c1"># The payload here is just to ease debugging.</span> |
| <span class="k">return</span> <span class="p">(</span> |
| <span class="n">python_urns</span><span class="o">.</span><span class="n">GENERIC_COMPOSITE_TRANSFORM</span><span class="p">,</span> |
| <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'_fn_api_payload'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="p">)))</span></div> |
| |
| <div class="viewcode-block" id="PTransform.to_runner_api_pickled"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.to_runner_api_pickled">[docs]</a> <span class="k">def</span> <span class="nf">to_runner_api_pickled</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">unused_context</span><span class="p">):</span> |
| <span class="c1"># type: (PipelineContext) -> Tuple[str, bytes]</span> |
| <span class="k">return</span> <span class="p">(</span><span class="n">python_urns</span><span class="o">.</span><span class="n">PICKLED_TRANSFORM</span><span class="p">,</span> <span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div> |
| |
| <div class="viewcode-block" id="PTransform.runner_api_requires_keyed_input"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform.runner_api_requires_keyed_input">[docs]</a> <span class="k">def</span> <span class="nf">runner_api_requires_keyed_input</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">return</span> <span class="kc">False</span></div> |
| |
| <span class="k">def</span> <span class="nf">_add_type_constraint_from_consumer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">full_label</span><span class="p">,</span> <span class="n">input_type_hints</span><span class="p">):</span> |
| <span class="c1"># type: (str, Tuple[str, Any]) -> None</span> |
| |
| <span class="sd">"""Adds a consumer transform's input type hints to our output type</span> |
| <span class="sd"> constraints, which is used during performance runtime type-checking.</span> |
| <span class="sd"> """</span> |
| <span class="k">pass</span></div> |
| |
| |
| <span class="nd">@PTransform</span><span class="o">.</span><span class="n">register_urn</span><span class="p">(</span><span class="n">python_urns</span><span class="o">.</span><span class="n">GENERIC_COMPOSITE_TRANSFORM</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">_create_transform</span><span class="p">(</span><span class="n">unused_ptransform</span><span class="p">,</span> <span class="n">payload</span><span class="p">,</span> <span class="n">unused_context</span><span class="p">):</span> |
| <span class="n">empty_transform</span> <span class="o">=</span> <span class="n">PTransform</span><span class="p">()</span> |
| <span class="n">empty_transform</span><span class="o">.</span><span class="n">_fn_api_payload</span> <span class="o">=</span> <span class="n">payload</span> |
| <span class="k">return</span> <span class="n">empty_transform</span> |
| |
| |
| <span class="nd">@PTransform</span><span class="o">.</span><span class="n">register_urn</span><span class="p">(</span><span class="n">python_urns</span><span class="o">.</span><span class="n">PICKLED_TRANSFORM</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">_unpickle_transform</span><span class="p">(</span><span class="n">unused_ptransform</span><span class="p">,</span> <span class="n">pickled_bytes</span><span class="p">,</span> <span class="n">unused_context</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">pickled_bytes</span><span class="p">)</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_ChainedPTransform</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">parts</span><span class="p">):</span> |
| <span class="c1"># type: (*PTransform) -> None</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">label</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_chain_label</span><span class="p">(</span><span class="n">parts</span><span class="p">))</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_parts</span> <span class="o">=</span> <span class="n">parts</span> |
| |
| <span class="k">def</span> <span class="nf">_chain_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parts</span><span class="p">):</span> |
| <span class="k">return</span> <span class="s1">'|'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">label</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">parts</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__or__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">right</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="n">PTransform</span><span class="p">):</span> |
| <span class="c1"># Create a flat list rather than a nested tree of composite</span> |
| <span class="c1"># transforms for better monitoring, etc.</span> |
| <span class="k">return</span> <span class="n">_ChainedPTransform</span><span class="p">(</span><span class="o">*</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_parts</span> <span class="o">+</span> <span class="p">(</span><span class="n">right</span><span class="p">,</span> <span class="p">)))</span> |
| <span class="k">return</span> <span class="bp">NotImplemented</span> |
| |
| <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pval</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">reduce</span><span class="p">(</span><span class="n">operator</span><span class="o">.</span><span class="n">or_</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parts</span><span class="p">,</span> <span class="n">pval</span><span class="p">)</span> |
| |
| |
| <span class="k">class</span> <span class="nc">PTransformWithSideInputs</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span> |
| <span class="sd">"""A superclass for any :class:`PTransform` (e.g.</span> |
| <span class="sd"> :func:`~apache_beam.transforms.core.FlatMap` or</span> |
| <span class="sd"> :class:`~apache_beam.transforms.core.CombineFn`)</span> |
| <span class="sd"> invoking user code.</span> |
| |
| <span class="sd"> :class:`PTransform` s like :func:`~apache_beam.transforms.core.FlatMap`</span> |
| <span class="sd"> invoke user-supplied code in some kind of package (e.g. a</span> |
| <span class="sd"> :class:`~apache_beam.transforms.core.DoFn`) and optionally provide arguments</span> |
| <span class="sd"> and side inputs to that code. This internal-use-only class contains common</span> |
| <span class="sd"> functionality for :class:`PTransform` s that fit this model.</span> |
| <span class="sd"> """</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> |
| <span class="c1"># type: (WithTypeHints, *Any, **Any) -> None</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="nb">type</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">issubclass</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="n">WithTypeHints</span><span class="p">):</span> |
| <span class="c1"># Don't treat Fn class objects as callables.</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Use </span><span class="si">%s</span><span class="s1">() not </span><span class="si">%s</span><span class="s1">.'</span> <span class="o">%</span> <span class="p">(</span><span class="n">fn</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="n">fn</span><span class="o">.</span><span class="vm">__name__</span><span class="p">))</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">make_fn</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="nb">bool</span><span class="p">(</span><span class="n">args</span> <span class="ow">or</span> <span class="n">kwargs</span><span class="p">))</span> |
| <span class="c1"># Now that we figure out the label, initialize the super-class.</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| |
| <span class="k">if</span> <span class="p">(</span><span class="nb">any</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PCollection</span><span class="p">)</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">args</span><span class="p">)</span> <span class="ow">or</span> |
| <span class="nb">any</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">PCollection</span><span class="p">)</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">values</span><span class="p">())):</span> |
| <span class="k">raise</span> <span class="n">error</span><span class="o">.</span><span class="n">SideInputError</span><span class="p">(</span> |
| <span class="s1">'PCollection used directly as side input argument. Specify '</span> |
| <span class="s1">'AsIter(pcollection) or AsSingleton(pcollection) to indicate how the '</span> |
| <span class="s1">'PCollection is to be used.'</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">side_inputs</span> <span class="o">=</span> <span class="n">util</span><span class="o">.</span><span class="n">remove_objects_from_args</span><span class="p">(</span> |
| <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">AsSideInput</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">raw_side_inputs</span> <span class="o">=</span> <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span> |
| |
| <span class="c1"># Prevent name collisions with fns of the form '<function <lambda> at ...>'</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_cached_fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fn</span> |
| |
| <span class="c1"># Ensure fn and side inputs are picklable for remote execution.</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">fn</span> <span class="o">=</span> <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fn</span><span class="p">))</span> |
| <span class="k">except</span> <span class="ne">RuntimeError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'Unable to pickle fn </span><span class="si">%s</span><span class="s1">: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fn</span><span class="p">,</span> <span class="n">e</span><span class="p">))</span> |
| |
| <span class="bp">self</span><span class="o">.</span><span class="n">args</span> <span class="o">=</span> <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">args</span><span class="p">))</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span> <span class="o">=</span> <span class="n">pickler</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">pickler</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span><span class="p">))</span> |
| |
| <span class="c1"># For type hints, because loads(dumps(class)) != class.</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cached_fn</span> |
| |
| <span class="k">def</span> <span class="nf">with_input_types</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">input_type_hint</span><span class="p">,</span> <span class="o">*</span><span class="n">side_inputs_arg_hints</span><span class="p">,</span> <span class="o">**</span><span class="n">side_input_kwarg_hints</span><span class="p">):</span> |
| <span class="sd">"""Annotates the types of main inputs and side inputs for the PTransform.</span> |
| |
| <span class="sd"> Args:</span> |
| <span class="sd"> input_type_hint: An instance of an allowed built-in type, a custom class,</span> |
| <span class="sd"> or an instance of a typehints.TypeConstraint.</span> |
| <span class="sd"> *side_inputs_arg_hints: A variable length argument composed of</span> |
| <span class="sd"> of an allowed built-in type, a custom class, or a</span> |
| <span class="sd"> typehints.TypeConstraint.</span> |
| <span class="sd"> **side_input_kwarg_hints: A dictionary argument composed of</span> |
| <span class="sd"> of an allowed built-in type, a custom class, or a</span> |
| <span class="sd"> typehints.TypeConstraint.</span> |
| |
| <span class="sd"> Example of annotating the types of side-inputs::</span> |
| |
| <span class="sd"> FlatMap().with_input_types(int, int, bool)</span> |
| |
| <span class="sd"> Raises:</span> |
| <span class="sd"> :class:`TypeError`: If **type_hint** is not a valid type-hint.</span> |
| <span class="sd"> See</span> |
| <span class="sd"> :func:`~apache_beam.typehints.typehints.validate_composite_type_param`</span> |
| <span class="sd"> for further details.</span> |
| |
| <span class="sd"> Returns:</span> |
| <span class="sd"> :class:`PTransform`: A reference to the instance of this particular</span> |
| <span class="sd"> :class:`PTransform` object. This allows chaining type-hinting related</span> |
| <span class="sd"> methods.</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">with_input_types</span><span class="p">(</span><span class="n">input_type_hint</span><span class="p">)</span> |
| |
| <span class="n">side_inputs_arg_hints</span> <span class="o">=</span> <span class="n">native_type_compatibility</span><span class="o">.</span><span class="n">convert_to_beam_types</span><span class="p">(</span> |
| <span class="n">side_inputs_arg_hints</span><span class="p">)</span> |
| <span class="n">side_input_kwarg_hints</span> <span class="o">=</span> <span class="n">native_type_compatibility</span><span class="o">.</span><span class="n">convert_to_beam_types</span><span class="p">(</span> |
| <span class="n">side_input_kwarg_hints</span><span class="p">)</span> |
| |
| <span class="k">for</span> <span class="n">si</span> <span class="ow">in</span> <span class="n">side_inputs_arg_hints</span><span class="p">:</span> |
| <span class="n">validate_composite_type_param</span><span class="p">(</span><span class="n">si</span><span class="p">,</span> <span class="s1">'Type hints for a PTransform'</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">si</span> <span class="ow">in</span> <span class="n">side_input_kwarg_hints</span><span class="o">.</span><span class="n">values</span><span class="p">():</span> |
| <span class="n">validate_composite_type_param</span><span class="p">(</span><span class="n">si</span><span class="p">,</span> <span class="s1">'Type hints for a PTransform'</span><span class="p">)</span> |
| |
| <span class="bp">self</span><span class="o">.</span><span class="n">side_inputs_types</span> <span class="o">=</span> <span class="n">side_inputs_arg_hints</span> |
| <span class="k">return</span> <span class="n">WithTypeHints</span><span class="o">.</span><span class="n">with_input_types</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">input_type_hint</span><span class="p">,</span> <span class="o">*</span><span class="n">side_inputs_arg_hints</span><span class="p">,</span> <span class="o">**</span><span class="n">side_input_kwarg_hints</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">type_check_inputs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">):</span> |
| <span class="n">type_hints</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_type_hints</span><span class="p">()</span> |
| <span class="n">input_types</span> <span class="o">=</span> <span class="n">type_hints</span><span class="o">.</span><span class="n">input_types</span> |
| <span class="k">if</span> <span class="n">input_types</span><span class="p">:</span> |
| <span class="n">args</span><span class="p">,</span> <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">raw_side_inputs</span> |
| |
| <span class="k">def</span> <span class="nf">element_type</span><span class="p">(</span><span class="n">side_input</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">side_input</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">.</span><span class="n">AsSideInput</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">side_input</span><span class="o">.</span><span class="n">element_type</span> |
| <span class="k">return</span> <span class="n">instance_to_type</span><span class="p">(</span><span class="n">side_input</span><span class="p">)</span> |
| |
| <span class="n">arg_types</span> <span class="o">=</span> <span class="p">[</span><span class="n">pvalueish</span><span class="o">.</span><span class="n">element_type</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">element_type</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">args</span><span class="p">]</span> |
| <span class="n">kwargs_types</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">element_type</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span> |
| <span class="n">argspec_fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_process_argspec_fn</span><span class="p">()</span> |
| <span class="n">bindings</span> <span class="o">=</span> <span class="n">getcallargs_forhints</span><span class="p">(</span><span class="n">argspec_fn</span><span class="p">,</span> <span class="o">*</span><span class="n">arg_types</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs_types</span><span class="p">)</span> |
| <span class="n">hints</span> <span class="o">=</span> <span class="n">getcallargs_forhints</span><span class="p">(</span> |
| <span class="n">argspec_fn</span><span class="p">,</span> <span class="o">*</span><span class="n">input_types</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="o">**</span><span class="n">input_types</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> |
| <span class="k">for</span> <span class="n">arg</span><span class="p">,</span> <span class="n">hint</span> <span class="ow">in</span> <span class="n">hints</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> |
| <span class="k">if</span> <span class="n">arg</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'__unknown__'</span><span class="p">):</span> |
| <span class="k">continue</span> |
| <span class="k">if</span> <span class="n">hint</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">continue</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">typehints</span><span class="o">.</span><span class="n">is_consistent_with</span><span class="p">(</span><span class="n">bindings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">typehints</span><span class="o">.</span><span class="n">Any</span><span class="p">),</span> |
| <span class="n">hint</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">TypeCheckError</span><span class="p">(</span> |
| <span class="s1">'Type hint violation for </span><span class="se">\'</span><span class="si">{label}</span><span class="se">\'</span><span class="s1">: requires </span><span class="si">{hint}</span><span class="s1"> but got '</span> |
| <span class="s1">'</span><span class="si">{actual_type}</span><span class="s1"> for </span><span class="si">{arg}</span><span class="se">\n</span><span class="s1">Full type hint:</span><span class="se">\n</span><span class="si">{debug_str}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> |
| <span class="n">label</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">,</span> |
| <span class="n">hint</span><span class="o">=</span><span class="n">hint</span><span class="p">,</span> |
| <span class="n">actual_type</span><span class="o">=</span><span class="n">bindings</span><span class="p">[</span><span class="n">arg</span><span class="p">],</span> |
| <span class="n">arg</span><span class="o">=</span><span class="n">arg</span><span class="p">,</span> |
| <span class="n">debug_str</span><span class="o">=</span><span class="n">type_hints</span><span class="o">.</span><span class="n">debug_str</span><span class="p">()))</span> |
| |
| <span class="k">def</span> <span class="nf">_process_argspec_fn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="sd">"""Returns an argspec of the function actually consuming the data.</span> |
| <span class="sd"> """</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span> |
| |
| <span class="k">def</span> <span class="nf">make_fn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">,</span> <span class="n">has_side_inputs</span><span class="p">):</span> |
| <span class="c1"># TODO(silviuc): Add comment describing that this is meant to be overriden</span> |
| <span class="c1"># by methods detecting callables and wrapping them in DoFns.</span> |
| <span class="k">return</span> <span class="n">fn</span> |
| |
| <span class="k">def</span> <span class="nf">default_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">return</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">(</span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fn</span><span class="o">.</span><span class="n">default_label</span><span class="p">())</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_PTransformFnPTransform</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span> |
| <span class="sd">"""A class wrapper for a function-based transform."""</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fn</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_fn</span> <span class="o">=</span> <span class="n">fn</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_args</span> <span class="o">=</span> <span class="n">args</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_kwargs</span> <span class="o">=</span> <span class="n">kwargs</span> |
| |
| <span class="k">def</span> <span class="nf">display_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="n">res</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'fn'</span><span class="p">:</span> <span class="p">(</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="o">.</span><span class="vm">__name__</span> |
| <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="p">,</span> <span class="s1">'__name__'</span><span class="p">)</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="o">.</span><span class="vm">__class__</span><span class="p">),</span> |
| <span class="s1">'args'</span><span class="p">:</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_args</span><span class="p">))</span><span class="o">.</span><span class="n">drop_if_default</span><span class="p">(</span><span class="s1">'()'</span><span class="p">),</span> |
| <span class="s1">'kwargs'</span><span class="p">:</span> <span class="n">DisplayDataItem</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_kwargs</span><span class="p">))</span><span class="o">.</span><span class="n">drop_if_default</span><span class="p">(</span><span class="s1">'</span><span class="si">{}</span><span class="s1">'</span><span class="p">)</span> |
| <span class="p">}</span> |
| <span class="k">return</span> <span class="n">res</span> |
| |
| <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pcoll</span><span class="p">):</span> |
| <span class="c1"># Since the PTransform will be implemented entirely as a function</span> |
| <span class="c1"># (once called), we need to pass through any type-hinting information that</span> |
| <span class="c1"># may have been annotated via the .with_input_types() and</span> |
| <span class="c1"># .with_output_types() methods.</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_kwargs</span><span class="p">)</span> |
| <span class="n">args</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_args</span><span class="p">)</span> |
| |
| <span class="c1"># TODO(BEAM-5878) Support keyword-only arguments.</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="k">if</span> <span class="s1">'type_hints'</span> <span class="ow">in</span> <span class="n">get_signature</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="p">)</span><span class="o">.</span><span class="n">parameters</span><span class="p">:</span> |
| <span class="n">args</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_type_hints</span><span class="p">(),</span> <span class="p">)</span> <span class="o">+</span> <span class="n">args</span> |
| <span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span> |
| <span class="c1"># Might not be a function.</span> |
| <span class="k">pass</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="p">(</span><span class="n">pcoll</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">default_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_args</span><span class="p">:</span> |
| <span class="k">return</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">(</span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="p">(</span> |
| <span class="n">label_from_callable</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="p">),</span> <span class="n">label_from_callable</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_args</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> |
| <span class="k">return</span> <span class="n">label_from_callable</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_fn</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="ptransform_fn"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.ptransform_fn">[docs]</a><span class="k">def</span> <span class="nf">ptransform_fn</span><span class="p">(</span><span class="n">fn</span><span class="p">):</span> |
| <span class="c1"># type: (Callable) -> Callable[..., _PTransformFnPTransform]</span> |
| |
| <span class="sd">"""A decorator for a function-based PTransform.</span> |
| |
| <span class="sd"> Args:</span> |
| <span class="sd"> fn: A function implementing a custom PTransform.</span> |
| |
| <span class="sd"> Returns:</span> |
| <span class="sd"> A CallablePTransform instance wrapping the function-based PTransform.</span> |
| |
| <span class="sd"> This wrapper provides an alternative, simpler way to define a PTransform.</span> |
| <span class="sd"> The standard method is to subclass from PTransform and override the expand()</span> |
| <span class="sd"> method. An equivalent effect can be obtained by defining a function that</span> |
| <span class="sd"> accepts an input PCollection and additional optional arguments and returns a</span> |
| <span class="sd"> resulting PCollection. For example::</span> |
| |
| <span class="sd"> @ptransform_fn</span> |
| <span class="sd"> @beam.typehints.with_input_types(..)</span> |
| <span class="sd"> @beam.typehints.with_output_types(..)</span> |
| <span class="sd"> def CustomMapper(pcoll, mapfn):</span> |
| <span class="sd"> return pcoll | ParDo(mapfn)</span> |
| |
| <span class="sd"> The equivalent approach using PTransform subclassing::</span> |
| |
| <span class="sd"> @beam.typehints.with_input_types(..)</span> |
| <span class="sd"> @beam.typehints.with_output_types(..)</span> |
| <span class="sd"> class CustomMapper(PTransform):</span> |
| |
| <span class="sd"> def __init__(self, mapfn):</span> |
| <span class="sd"> super().__init__()</span> |
| <span class="sd"> self.mapfn = mapfn</span> |
| |
| <span class="sd"> def expand(self, pcoll):</span> |
| <span class="sd"> return pcoll | ParDo(self.mapfn)</span> |
| |
| <span class="sd"> With either method the custom PTransform can be used in pipelines as if</span> |
| <span class="sd"> it were one of the "native" PTransforms::</span> |
| |
| <span class="sd"> result_pcoll = input_pcoll | 'Label' >> CustomMapper(somefn)</span> |
| |
| <span class="sd"> Note that for both solutions the underlying implementation of the pipe</span> |
| <span class="sd"> operator (i.e., `|`) will inject the pcoll argument in its proper place</span> |
| <span class="sd"> (first argument if no label was specified and second argument otherwise).</span> |
| |
| <span class="sd"> Type hint support needs to be enabled via the</span> |
| <span class="sd"> --type_check_additional=ptransform_fn flag in Beam 2.</span> |
| <span class="sd"> If CustomMapper is a Cython function, you can still specify input and output</span> |
| <span class="sd"> types provided the decorators appear before @ptransform_fn.</span> |
| <span class="sd"> """</span> |
| <span class="c1"># TODO(robertwb): Consider removing staticmethod to allow for self parameter.</span> |
| <span class="nd">@wraps</span><span class="p">(</span><span class="n">fn</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">callable_ptransform_factory</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> |
| <span class="n">res</span> <span class="o">=</span> <span class="n">_PTransformFnPTransform</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">ptransform_fn_typehints_enabled</span><span class="p">:</span> |
| <span class="c1"># Apply type hints applied before or after the ptransform_fn decorator,</span> |
| <span class="c1"># falling back on PTransform defaults.</span> |
| <span class="c1"># If the @with_{input,output}_types decorator comes before ptransform_fn,</span> |
| <span class="c1"># the type hints get applied to this function. If it comes after they will</span> |
| <span class="c1"># get applied to fn, and @wraps will copy the _type_hints attribute to</span> |
| <span class="c1"># this function.</span> |
| <span class="n">type_hints</span> <span class="o">=</span> <span class="n">get_type_hints</span><span class="p">(</span><span class="n">callable_ptransform_factory</span><span class="p">)</span> |
| <span class="n">res</span><span class="o">.</span><span class="n">_set_type_hints</span><span class="p">(</span><span class="n">type_hints</span><span class="o">.</span><span class="n">with_defaults</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">get_type_hints</span><span class="p">()))</span> |
| <span class="n">_LOGGER</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span> |
| <span class="s1">'type hints for </span><span class="si">%s</span><span class="s1">: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">res</span><span class="o">.</span><span class="n">default_label</span><span class="p">(),</span> <span class="n">res</span><span class="o">.</span><span class="n">get_type_hints</span><span class="p">())</span> |
| <span class="k">return</span> <span class="n">res</span> |
| |
| <span class="k">return</span> <span class="n">callable_ptransform_factory</span></div> |
| |
| |
| <div class="viewcode-block" id="label_from_callable"><a class="viewcode-back" href="../../../apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.label_from_callable">[docs]</a><span class="k">def</span> <span class="nf">label_from_callable</span><span class="p">(</span><span class="n">fn</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="s1">'default_label'</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">fn</span><span class="o">.</span><span class="n">default_label</span><span class="p">()</span> |
| <span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="s1">'__name__'</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">fn</span><span class="o">.</span><span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'<lambda>'</span><span class="p">:</span> |
| <span class="k">return</span> <span class="s1">'<lambda at </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">>'</span> <span class="o">%</span> <span class="p">(</span> |
| <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">fn</span><span class="o">.</span><span class="vm">__code__</span><span class="o">.</span><span class="n">co_filename</span><span class="p">),</span> <span class="n">fn</span><span class="o">.</span><span class="vm">__code__</span><span class="o">.</span><span class="n">co_firstlineno</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">fn</span><span class="o">.</span><span class="vm">__name__</span> |
| <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">fn</span><span class="p">)</span></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_NamedPTransform</span><span class="p">(</span><span class="n">PTransform</span><span class="p">):</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">transform</span><span class="p">,</span> <span class="n">label</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">transform</span> <span class="o">=</span> <span class="n">transform</span> |
| |
| <span class="k">def</span> <span class="fm">__ror__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalueish</span><span class="p">,</span> <span class="n">_unused</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="o">.</span><span class="fm">__ror__</span><span class="p">(</span><span class="n">pvalueish</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">expand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pvalue</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Should never be expanded directly."</span><span class="p">)</span> |
| </pre></div> |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| © Copyright |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| |
| |
| |
| |
| |
| </body> |
| </html> |