

<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>apache_beam.transforms.util module &mdash; Apache Beam  documentation</title>
  

  
  
  
  

  
  <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
        <script type="text/javascript" src="_static/jquery.js"></script>
        <script type="text/javascript" src="_static/underscore.js"></script>
        <script type="text/javascript" src="_static/doctools.js"></script>
        <script type="text/javascript" src="_static/language_data.js"></script>
        <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
    
    <script type="text/javascript" src="_static/js/theme.js"></script>

    

  
  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="apache_beam.transforms.window module" href="apache_beam.transforms.window.html" />
    <link rel="prev" title="apache_beam.transforms.userstate module" href="apache_beam.transforms.userstate.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="index.html" class="icon icon-home"> Apache Beam
          

          
          </a>

          
            
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.transforms.html#submodules">Submodules</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.combinefn_lifecycle_pipeline.html">apache_beam.transforms.combinefn_lifecycle_pipeline module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.combiners.html">apache_beam.transforms.combiners module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.core.html">apache_beam.transforms.core module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.create_source.html">apache_beam.transforms.create_source module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.deduplicate.html">apache_beam.transforms.deduplicate module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.display.html">apache_beam.transforms.display module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.environments.html">apache_beam.transforms.environments module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.external.html">apache_beam.transforms.external module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.external_java.html">apache_beam.transforms.external_java module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.periodicsequence.html">apache_beam.transforms.periodicsequence module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.ptransform.html">apache_beam.transforms.ptransform module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.resources.html">apache_beam.transforms.resources module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.sideinputs.html">apache_beam.transforms.sideinputs module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.sql.html">apache_beam.transforms.sql module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.stats.html">apache_beam.transforms.stats module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.timeutil.html">apache_beam.transforms.timeutil module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.trigger.html">apache_beam.transforms.trigger module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.userstate.html">apache_beam.transforms.userstate module</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">apache_beam.transforms.util module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.window.html">apache_beam.transforms.window module</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">Apache Beam</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="index.html">Docs</a> &raquo;</li>
        
          <li><a href="apache_beam.transforms.html">apache_beam.transforms package</a> &raquo;</li>
        
      <li>apache_beam.transforms.util module</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="_sources/apache_beam.transforms.util.rst.txt" rel="nofollow"> View page source</a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="module-apache_beam.transforms.util">
<span id="apache-beam-transforms-util-module"></span><h1>apache_beam.transforms.util module<a class="headerlink" href="#module-apache_beam.transforms.util" title="Permalink to this headline">¶</a></h1>
<p>Simple utility PTransforms.</p>
<dl class="class">
<dt id="apache_beam.transforms.util.CoGroupByKey">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">CoGroupByKey</code><span class="sig-paren">(</span><em>*</em>, <em>pipeline=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#CoGroupByKey"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.CoGroupByKey" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>Groups results across several PCollections by key.</p>
<p>Given an input dict of serializable keys (called “tags”) to 0 or more
PCollections of (key, value) tuples, it creates a single output PCollection
of (key, value) tuples whose keys are the unique input keys from all inputs,
and whose values are dicts mapping each tag to an iterable of whatever values
were under the key in the corresponding PCollection, in this manner:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">(</span><span class="s1">&#39;some key&#39;</span><span class="p">,</span> <span class="p">{</span><span class="s1">&#39;tag1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;value 1 under &quot;some key&quot; in pcoll1&#39;</span><span class="p">,</span>
                       <span class="s1">&#39;value 2 under &quot;some key&quot; in pcoll1&#39;</span><span class="p">,</span>
                       <span class="o">...</span><span class="p">],</span>
              <span class="s1">&#39;tag2&#39;</span><span class="p">:</span> <span class="o">...</span> <span class="p">,</span>
              <span class="o">...</span> <span class="p">})</span>
</pre></div>
</div>
<p>For example, given:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="s1">&#39;tag1&#39;</span><span class="p">:</span> <span class="n">pc1</span><span class="p">,</span> <span class="s1">&#39;tag2&#39;</span><span class="p">:</span> <span class="n">pc2</span><span class="p">,</span> <span class="mi">333</span><span class="p">:</span> <span class="n">pc3</span><span class="p">}</span>
</pre></div>
</div>
<p>where:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pc1</span> <span class="o">=</span> <span class="p">[(</span><span class="n">k1</span><span class="p">,</span> <span class="n">v1</span><span class="p">)]</span>
<span class="n">pc2</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">pc3</span> <span class="o">=</span> <span class="p">[(</span><span class="n">k1</span><span class="p">,</span> <span class="n">v31</span><span class="p">),</span> <span class="p">(</span><span class="n">k1</span><span class="p">,</span> <span class="n">v32</span><span class="p">),</span> <span class="p">(</span><span class="n">k2</span><span class="p">,</span> <span class="n">v33</span><span class="p">)]</span>
</pre></div>
</div>
<p>The output PCollection would be:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[(</span><span class="n">k1</span><span class="p">,</span> <span class="p">{</span><span class="s1">&#39;tag1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="n">v1</span><span class="p">],</span> <span class="s1">&#39;tag2&#39;</span><span class="p">:</span> <span class="p">[],</span> <span class="mi">333</span><span class="p">:</span> <span class="p">[</span><span class="n">v31</span><span class="p">,</span> <span class="n">v32</span><span class="p">]}),</span>
 <span class="p">(</span><span class="n">k2</span><span class="p">,</span> <span class="p">{</span><span class="s1">&#39;tag1&#39;</span><span class="p">:</span> <span class="p">[],</span> <span class="s1">&#39;tag2&#39;</span><span class="p">:</span> <span class="p">[],</span> <span class="mi">333</span><span class="p">:</span> <span class="p">[</span><span class="n">v33</span><span class="p">]})]</span>
</pre></div>
</div>
<p>CoGroupByKey also works for tuples, lists, or other flat iterables of
PCollections, in which case the values of the resulting PCollections
will be tuples whose nth value is the list of values from the nth
PCollection—conceptually, the “tags” are the indices into the input.
Thus, for this input:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">(</span><span class="n">pc1</span><span class="p">,</span> <span class="n">pc2</span><span class="p">,</span> <span class="n">pc3</span><span class="p">)</span>
</pre></div>
</div>
<p>the output would be:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[(</span><span class="n">k1</span><span class="p">,</span> <span class="p">([</span><span class="n">v1</span><span class="p">],</span> <span class="p">[],</span> <span class="p">[</span><span class="n">v31</span><span class="p">,</span> <span class="n">v32</span><span class="p">]),</span>
 <span class="p">(</span><span class="n">k2</span><span class="p">,</span> <span class="p">([],</span> <span class="p">[],</span> <span class="p">[</span><span class="n">v33</span><span class="p">]))]</span>
</pre></div>
</div>
<dl class="attribute">
<dt>
<code class="descname">**kwargs</code></dt>
<dd><p>Accepts a single named argument “pipeline”, which specifies the
pipeline that “owns” this PTransform. Ordinarily CoGroupByKey can obtain
this information from one of the input PCollections, but if there are none
(or if there’s a chance there may be none), this argument is the only way
to provide pipeline information, and should be considered mandatory.</p>
</dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.CoGroupByKey.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcolls</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#CoGroupByKey.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.CoGroupByKey.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

<dl class="function">
<dt id="apache_beam.transforms.util.Keys">
<code class="descclassname">apache_beam.transforms.util.</code><code class="descname">Keys</code><span class="sig-paren">(</span><em>pcoll</em>, <em>label='Keys'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Keys" title="Permalink to this definition">¶</a></dt>
<dd><p>Produces a PCollection of first elements of 2-tuples in a PCollection.</p>
</dd></dl>

<dl class="function">
<dt id="apache_beam.transforms.util.Values">
<code class="descclassname">apache_beam.transforms.util.</code><code class="descname">Values</code><span class="sig-paren">(</span><em>pcoll</em>, <em>label='Values'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Values"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Values" title="Permalink to this definition">¶</a></dt>
<dd><p>Produces a PCollection of second elements of 2-tuples in a PCollection.</p>
</dd></dl>

<dl class="function">
<dt id="apache_beam.transforms.util.KvSwap">
<code class="descclassname">apache_beam.transforms.util.</code><code class="descname">KvSwap</code><span class="sig-paren">(</span><em>pcoll</em>, <em>label='KvSwap'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#KvSwap"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.KvSwap" title="Permalink to this definition">¶</a></dt>
<dd><p>Produces a PCollection reversing 2-tuples in a PCollection.</p>
</dd></dl>

<dl class="function">
<dt id="apache_beam.transforms.util.Distinct">
<code class="descclassname">apache_beam.transforms.util.</code><code class="descname">Distinct</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Distinct"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Distinct" title="Permalink to this definition">¶</a></dt>
<dd><p>Produces a PCollection containing distinct elements of a PCollection.</p>
</dd></dl>

<dl class="function">
<dt id="apache_beam.transforms.util.RemoveDuplicates">
<code class="descclassname">apache_beam.transforms.util.</code><code class="descname">RemoveDuplicates</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#RemoveDuplicates"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.RemoveDuplicates" title="Permalink to this definition">¶</a></dt>
<dd><p>Produces a PCollection containing distinct elements of a PCollection.</p>
</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.BatchElements">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">BatchElements</code><span class="sig-paren">(</span><em>min_batch_size=1</em>, <em>max_batch_size=10000</em>, <em>target_batch_overhead=0.05</em>, <em>target_batch_duration_secs=1</em>, <em>variance=0.25</em>, <em>clock=&lt;built-in function time&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#BatchElements"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.BatchElements" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>A Transform that batches elements for amortized processing.</p>
<p>This transform is designed to precede operations whose processing cost
is of the form</p>
<blockquote>
<div>time = fixed_cost + num_elements * per_element_cost</div></blockquote>
<p>where the per element cost is (often significantly) smaller than the fixed
cost and could be amortized over multiple elements.  It consumes a PCollection
of element type T and produces a PCollection of element type List[T].</p>
<p>This transform attempts to find the best batch size between the minimim
and maximum parameters by profiling the time taken by (fused) downstream
operations. For a fixed batch size, set the min and max to be equal.</p>
<p>Elements are batched per-window and batches emitted in the window
corresponding to its contents.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>min_batch_size</strong> – (optional) the smallest number of elements per batch</li>
<li><strong>max_batch_size</strong> – (optional) the largest number of elements per batch</li>
<li><strong>target_batch_overhead</strong> – (optional) a target for fixed_cost / time,
as used in the formula above</li>
<li><strong>target_batch_duration_secs</strong> – (optional) a target for total time per bundle,
in seconds</li>
<li><strong>variance</strong> – (optional) the permitted (relative) amount of deviation from the
(estimated) ideal batch size used to produce a wider base for
linear interpolation</li>
<li><strong>clock</strong> – (optional) an alternative to time.time for measuring the cost of
donwstream operations (mostly for testing)</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.transforms.util.BatchElements.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#BatchElements.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.BatchElements.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.Reshuffle">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">Reshuffle</code><span class="sig-paren">(</span><em>label=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reshuffle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reshuffle" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform that returns a PCollection equivalent to its input,
but operationally provides some of the side effects of a GroupByKey,
in particular checkpointing, and preventing fusion of the surrounding
transforms.</p>
<p>Reshuffle adds a temporary random key to each element, performs a
ReshufflePerKey, and finally removes the temporary key.</p>
<p>Reshuffle is experimental. No backwards compatibility guarantees.</p>
<dl class="method">
<dt id="apache_beam.transforms.util.Reshuffle.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reshuffle.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reshuffle.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.Reshuffle.to_runner_api_parameter">
<code class="descname">to_runner_api_parameter</code><span class="sig-paren">(</span><em>unused_context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reshuffle.to_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reshuffle.to_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Reshuffle.from_runner_api_parameter">
<em class="property">static </em><code class="descname">from_runner_api_parameter</code><span class="sig-paren">(</span><em>unused_ptransform</em>, <em>unused_parameter</em>, <em>unused_context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reshuffle.from_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reshuffle.from_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

<dl class="function">
<dt id="apache_beam.transforms.util.WithKeys">
<code class="descclassname">apache_beam.transforms.util.</code><code class="descname">WithKeys</code><span class="sig-paren">(</span><em>pcoll</em>, <em>k</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#WithKeys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.WithKeys" title="Permalink to this definition">¶</a></dt>
<dd><p>PTransform that takes a PCollection, and either a constant key or a
callable, and returns a PCollection of (K, V), where each of the values in
the input PCollection has been paired with either the constant key or a key
computed from the value.  The callable may optionally accept positional or
keyword arguments, which should be passed to WithKeys directly.  These may
be either SideInputs or static (non-PCollection) values, such as ints.</p>
</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.GroupIntoBatches">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">GroupIntoBatches</code><span class="sig-paren">(</span><em>batch_size</em>, <em>max_buffering_duration_secs=None</em>, <em>clock=&lt;built-in function time&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform that batches the input into desired batch size. Elements are
buffered until they are equal to batch size provided in the argument at which
point they are output to the output Pcollection.</p>
<p>Windows are preserved (batches will contain elements from the same window)</p>
<p>GroupIntoBatches is experimental. Its use case will depend on the runner if
it has support of States and Timers.</p>
<p>Create a new GroupIntoBatches.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>batch_size</strong> – (required) How many elements should be in a batch</li>
<li><strong>max_buffering_duration_secs</strong> – (optional) How long in seconds at most an
incomplete batch of elements is allowed to be buffered in the states.
The duration must be a positive second duration and should be given as
an int or float. Setting this parameter to zero effectively means no
buffering limit.</li>
<li><strong>clock</strong> – (optional) an alternative to time.time (mostly for testing)</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.transforms.util.GroupIntoBatches.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.GroupIntoBatches.to_runner_api_parameter">
<code class="descname">to_runner_api_parameter</code><span class="sig-paren">(</span><em>unused_context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.to_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.to_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.GroupIntoBatches.from_runner_api_parameter">
<em class="property">static </em><code class="descname">from_runner_api_parameter</code><span class="sig-paren">(</span><em>unused_ptransform</em>, <em>proto</em>, <em>unused_context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.from_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.from_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.GroupIntoBatches.WithShardedKey">
<em class="property">class </em><code class="descname">WithShardedKey</code><span class="sig-paren">(</span><em>batch_size</em>, <em>max_buffering_duration_secs=None</em>, <em>clock=&lt;built-in function time&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.WithShardedKey"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.WithShardedKey" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>A GroupIntoBatches transform that outputs batched elements associated
with sharded input keys.</p>
<p>By default, keys are sharded to such that the input elements with the same
key are spread to all available threads executing the transform. Runners may
override the default sharding to do a better load balancing during the
execution time.</p>
<p>Create a new GroupIntoBatches with sharded output.
See <code class="docutils literal notranslate"><span class="pre">GroupIntoBatches</span></code> transform for a description of input parameters.</p>
<dl class="method">
<dt id="apache_beam.transforms.util.GroupIntoBatches.WithShardedKey.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.WithShardedKey.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.WithShardedKey.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.GroupIntoBatches.WithShardedKey.to_runner_api_parameter">
<code class="descname">to_runner_api_parameter</code><span class="sig-paren">(</span><em>unused_context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.WithShardedKey.to_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.WithShardedKey.to_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.GroupIntoBatches.WithShardedKey.from_runner_api_parameter">
<em class="property">static </em><code class="descname">from_runner_api_parameter</code><span class="sig-paren">(</span><em>unused_ptransform</em>, <em>proto</em>, <em>unused_context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#GroupIntoBatches.WithShardedKey.from_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.GroupIntoBatches.WithShardedKey.from_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.ToString">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">ToString</code><a class="reference internal" href="_modules/apache_beam/transforms/util.html#ToString"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.ToString" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>PTransform for converting a PCollection element, KV or PCollection Iterable
to string.</p>
<dl class="staticmethod">
<dt id="apache_beam.transforms.util.ToString.Element">
<em class="property">static </em><code class="descname">Element</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#ToString.Element"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.ToString.Element" title="Permalink to this definition">¶</a></dt>
<dd><p>Transforms each element of the PCollection to a string.</p>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.ToString.Iterables">
<em class="property">static </em><code class="descname">Iterables</code><span class="sig-paren">(</span><em>delimiter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#ToString.Iterables"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.ToString.Iterables" title="Permalink to this definition">¶</a></dt>
<dd><p>Transforms each item in the iterable of the input of PCollection to a
string. There is no trailing delimiter.</p>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.ToString.Kvs">
<em class="property">static </em><code class="descname">Kvs</code><span class="sig-paren">(</span><em>delimiter=None</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.transforms.util.ToString.Kvs" title="Permalink to this definition">¶</a></dt>
<dd><p>Transforms each item in the iterable of the input of PCollection to a
string. There is no trailing delimiter.</p>
</dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.Reify">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">Reify</code><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>PTransforms for converting between explicit and implicit form of various
Beam values.</p>
<dl class="class">
<dt id="apache_beam.transforms.util.Reify.Timestamp">
<em class="property">class </em><code class="descname">Timestamp</code><span class="sig-paren">(</span><em>label=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.Timestamp"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.Timestamp" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform to wrap a value in a TimestampedValue with it’s
associated timestamp.</p>
<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Reify.Timestamp.add_timestamp_info">
<em class="property">static </em><code class="descname">add_timestamp_info</code><span class="sig-paren">(</span><em>element</em>, <em>timestamp=TimestampParam</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.Timestamp.add_timestamp_info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.Timestamp.add_timestamp_info" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.Reify.Timestamp.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.Timestamp.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.Timestamp.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.Reify.Window">
<em class="property">class </em><code class="descname">Window</code><span class="sig-paren">(</span><em>label=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.Window"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.Window" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform to convert an element in a PCollection into a tuple of
(element, timestamp, window), wrapped in a TimestampedValue with it’s
associated timestamp.</p>
<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Reify.Window.add_window_info">
<em class="property">static </em><code class="descname">add_window_info</code><span class="sig-paren">(</span><em>element</em>, <em>timestamp=TimestampParam</em>, <em>window=WindowParam</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.Window.add_window_info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.Window.add_window_info" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.Reify.Window.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.Window.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.Window.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.Reify.TimestampInValue">
<em class="property">class </em><code class="descname">TimestampInValue</code><span class="sig-paren">(</span><em>label=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.TimestampInValue"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.TimestampInValue" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform to wrap the Value in a KV pair in a TimestampedValue with
the element’s associated timestamp.</p>
<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Reify.TimestampInValue.add_timestamp_info">
<em class="property">static </em><code class="descname">add_timestamp_info</code><span class="sig-paren">(</span><em>element</em>, <em>timestamp=TimestampParam</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.TimestampInValue.add_timestamp_info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.TimestampInValue.add_timestamp_info" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.Reify.TimestampInValue.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.TimestampInValue.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.TimestampInValue.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.Reify.WindowInValue">
<em class="property">class </em><code class="descname">WindowInValue</code><span class="sig-paren">(</span><em>label=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.WindowInValue"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.WindowInValue" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform to convert the Value in a KV pair into a tuple of
(value, timestamp, window), with the whole element being wrapped inside a
TimestampedValue.</p>
<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Reify.WindowInValue.add_window_info">
<em class="property">static </em><code class="descname">add_window_info</code><span class="sig-paren">(</span><em>element</em>, <em>timestamp=TimestampParam</em>, <em>window=WindowParam</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.WindowInValue.add_window_info"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.WindowInValue.add_window_info" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="method">
<dt id="apache_beam.transforms.util.Reify.WindowInValue.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Reify.WindowInValue.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Reify.WindowInValue.expand" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

</dd></dl>

</dd></dl>

<dl class="class">
<dt id="apache_beam.transforms.util.Regex">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.util.</code><code class="descname">Regex</code><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex" title="Permalink to this definition">¶</a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>PTransform  to use Regular Expression to process the elements in a
PCollection.</p>
<dl class="attribute">
<dt id="apache_beam.transforms.util.Regex.ALL">
<code class="descname">ALL</code><em class="property"> = '__regex_all_groups'</em><a class="headerlink" href="#apache_beam.transforms.util.Regex.ALL" title="Permalink to this definition">¶</a></dt>
<dd></dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.matches">
<em class="property">static </em><code class="descname">matches</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>group=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.matches"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.matches" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the matches (group 0 by default) if zero or more characters at the
beginning of string match the regular expression. To match the entire
string, add “$” sign at the end of regex expression.</p>
<p>Group can be integer value or a string value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>group</strong> – (optional) name/number of the group, it can be integer or a string
value. Defaults to 0, meaning the entire matched string will be
returned.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.all_matches">
<em class="property">static </em><code class="descname">all_matches</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.all_matches"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.all_matches" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns all matches (groups) if zero or more characters at the beginning
of string match the regular expression.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>regex</strong> – the regular expression string or (re.compile) pattern.</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.matches_kv">
<em class="property">static </em><code class="descname">matches_kv</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>keyGroup</em>, <em>valueGroup=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.matches_kv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.matches_kv" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the KV pairs if the string matches the regular expression, deriving
the key &amp; value from the specified group of the regular expression.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>keyGroup</strong> – The Regex group to use as the key. Can be int or str.</li>
<li><strong>valueGroup</strong> – (optional) Regex group to use the value. Can be int or str.
The default value “0” returns entire matched string.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.find">
<em class="property">static </em><code class="descname">find</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>group=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.find"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.find" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the matches if a portion of the line matches the Regex. Returns
the entire group (group 0 by default). Group can be integer value or a
string value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>group</strong> – (optional) name of the group, it can be integer or a string value.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.find_all">
<em class="property">static </em><code class="descname">find_all</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>group=0</em>, <em>outputEmpty=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.find_all"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.find_all" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the matches if a portion of the line matches the Regex. By default,
list of group 0 will return with empty items. To get all groups, pass the
<cite>Regex.ALL</cite> flag in the <cite>group</cite> parameter which returns all the groups in
the tuple format.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>group</strong> – (optional) name of the group, it can be integer or a string value.</li>
<li><strong>outputEmpty</strong> – (optional) Should empty be output. True to output empties
and false if not.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.find_kv">
<em class="property">static </em><code class="descname">find_kv</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>keyGroup</em>, <em>valueGroup=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.find_kv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.find_kv" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the matches if a portion of the line matches the Regex. Returns the
specified groups as the key and value pair.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>keyGroup</strong> – The Regex group to use as the key. Can be int or str.</li>
<li><strong>valueGroup</strong> – (optional) Regex group to use the value. Can be int or str.
The default value “0” returns entire matched string.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.replace_all">
<em class="property">static </em><code class="descname">replace_all</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>replacement</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.replace_all"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.replace_all" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the matches if a portion of the line  matches the regex and
replaces all matches with the replacement string.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>replacement</strong> – the string to be substituted for each match.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.replace_first">
<em class="property">static </em><code class="descname">replace_first</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>replacement</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.replace_first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.replace_first" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the matches if a portion of the line matches the regex and replaces
the first match with the replacement string.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>replacement</strong> – the string to be substituted for each match.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

<dl class="staticmethod">
<dt id="apache_beam.transforms.util.Regex.split">
<em class="property">static </em><code class="descname">split</code><span class="sig-paren">(</span><em>pcoll</em>, <em>regex</em>, <em>outputEmpty=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/util.html#Regex.split"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.util.Regex.split" title="Permalink to this definition">¶</a></dt>
<dd><p>Returns the list string which was splitted on the basis of regular
expression. It will not output empty items (by defaults).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>regex</strong> – the regular expression string or (re.compile) pattern.</li>
<li><strong>outputEmpty</strong> – (optional) Should empty be output. True to output empties
and false if not.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>

</dd></dl>

</div>


           </div>
           
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="apache_beam.transforms.window.html" class="btn btn-neutral float-right" title="apache_beam.transforms.window module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="apache_beam.transforms.userstate.html" class="btn btn-neutral float-left" title="apache_beam.transforms.userstate module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
   

</body>
</html>