blob: d9584637ce4c180a917dbff87d04d35c22726975 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>apache_beam.io.gcp.experimental.spannerio module &mdash; Apache Beam documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.io.gcp.big_query_query_to_table_pipeline module" href="apache_beam.io.gcp.big_query_query_to_table_pipeline.html" />
<link rel="prev" title="apache_beam.io.gcp.experimental package" href="apache_beam.io.gcp.experimental.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Apache Beam
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.io.html#subpackages">Subpackages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.aws.html">apache_beam.io.aws package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.azure.html">apache_beam.io.azure package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.external.html">apache_beam.io.external package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.flink.html">apache_beam.io.flink package</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a><ul class="current">
<li class="toctree-l4 current"><a class="reference internal" href="apache_beam.io.gcp.html#subpackages">Subpackages</a></li>
<li class="toctree-l4"><a class="reference internal" href="apache_beam.io.gcp.html#submodules">Submodules</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="apache_beam.io.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li><a href="apache_beam.io.html">apache_beam.io package</a> &raquo;</li>
<li><a href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a> &raquo;</li>
<li><a href="apache_beam.io.gcp.experimental.html">apache_beam.io.gcp.experimental package</a> &raquo;</li>
<li>apache_beam.io.gcp.experimental.spannerio module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.io.gcp.experimental.spannerio.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-apache_beam.io.gcp.experimental.spannerio">
<span id="apache-beam-io-gcp-experimental-spannerio-module"></span><h1>apache_beam.io.gcp.experimental.spannerio module<a class="headerlink" href="#module-apache_beam.io.gcp.experimental.spannerio" title="Permalink to this headline"></a></h1>
<p>Google Cloud Spanner IO</p>
<p>Experimental; no backwards-compatibility guarantees.</p>
<p>This is an experimental module for reading and writing data from Google Cloud
Spanner. Visit: <a class="reference external" href="https://cloud.google.com/spanner">https://cloud.google.com/spanner</a> for more details.</p>
<p>Reading Data from Cloud Spanner.</p>
<p>To read from Cloud Spanner apply ReadFromSpanner transformation. It will
return a PCollection, where each element represents an individual row returned
from the read operation. Both Query and Read APIs are supported.</p>
<p>ReadFromSpanner relies on the ReadOperation objects which is exposed by the
SpannerIO API. ReadOperation holds the immutable data which is responsible to
execute batch and naive reads on Cloud Spanner. This is done for more
convenient programming.</p>
<p>ReadFromSpanner reads from Cloud Spanner by providing either an ‘sql’ param
in the constructor or ‘table’ name with ‘columns’ as list. For example::</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">records</span> <span class="o">=</span> <span class="p">(</span><span class="n">pipeline</span>
<span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">,</span>
<span class="n">sql</span><span class="o">=</span><span class="s1">&#39;Select * from users&#39;</span><span class="p">))</span>
<span class="n">records</span> <span class="o">=</span> <span class="p">(</span><span class="n">pipeline</span>
<span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">,</span>
<span class="n">table</span><span class="o">=</span><span class="s1">&#39;users&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;id&#39;</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;email&#39;</span><span class="p">]))</span>
</pre></div>
</div>
<p>You can also perform multiple reads by providing a list of ReadOperations
to the ReadFromSpanner transform constructor. ReadOperation exposes two static
methods. Use ‘query’ to perform sql based reads, ‘table’ to perform read from
table name. For example::</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">read_operations</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">ReadOperation</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">&#39;customers&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span>
<span class="s1">&#39;email&#39;</span><span class="p">]),</span>
<span class="n">ReadOperation</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">&#39;vendors&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span>
<span class="s1">&#39;email&#39;</span><span class="p">]),</span>
<span class="p">]</span>
<span class="n">all_users</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">,</span>
<span class="n">read_operations</span><span class="o">=</span><span class="n">read_operations</span><span class="p">)</span>
<span class="o">...</span><span class="n">OR</span><span class="o">...</span>
<span class="n">read_operations</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">ReadOperation</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">sql</span><span class="o">=</span><span class="s1">&#39;Select name, email from</span>
<span class="n">customers</span><span class="s1">&#39;),</span>
<span class="n">ReadOperation</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
<span class="n">sql</span><span class="o">=</span><span class="s1">&#39;Select * from users where id &lt;= @user_id&#39;</span><span class="p">,</span>
<span class="n">params</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;user_id&#39;</span><span class="p">:</span> <span class="mi">100</span><span class="p">},</span>
<span class="n">params_type</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;user_id&#39;</span><span class="p">:</span> <span class="n">param_types</span><span class="o">.</span><span class="n">INT64</span><span class="p">}</span>
<span class="p">),</span>
<span class="p">]</span>
<span class="c1"># `params_types` are instance of `google.cloud.spanner.param_types`</span>
<span class="n">all_users</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">,</span>
<span class="n">read_operations</span><span class="o">=</span><span class="n">read_operations</span><span class="p">)</span>
</pre></div>
</div>
<p>For more information, please review the docs on class ReadOperation.</p>
<p>User can also able to provide the ReadOperation in form of PCollection via
pipeline. For example::</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">users</span> <span class="o">=</span> <span class="p">(</span><span class="n">pipeline</span>
<span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span><span class="n">ReadOperation</span><span class="o">...</span><span class="p">])</span>
<span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">))</span>
</pre></div>
</div>
<p>User may also create cloud spanner transaction from the transform called
<cite>create_transaction</cite> which is available in the SpannerIO API.</p>
<p>The transform is guaranteed to be executed on a consistent snapshot of data,
utilizing the power of read only transactions. Staleness of data can be
controlled by providing the <cite>read_timestamp</cite> or <cite>exact_staleness</cite> param values
in the constructor.</p>
<p>This transform requires root of the pipeline (PBegin) and returns PTransform
which is passed later to the <cite>ReadFromSpanner</cite> constructor. <cite>ReadFromSpanner</cite>
pass this transaction PTransform as a singleton side input to the
<cite>_NaiveSpannerReadDoFn</cite> containing ‘session_id’ and ‘transaction_id’.
For example::</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">transaction</span> <span class="o">=</span> <span class="p">(</span><span class="n">pipeline</span> <span class="o">|</span> <span class="n">create_transaction</span><span class="p">(</span><span class="n">TEST_PROJECT_ID</span><span class="p">,</span>
<span class="n">TEST_INSTANCE_ID</span><span class="p">,</span>
<span class="n">DB_NAME</span><span class="p">))</span>
<span class="n">users</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">,</span>
<span class="n">sql</span><span class="o">=</span><span class="s1">&#39;Select * from users&#39;</span><span class="p">,</span> <span class="n">transaction</span><span class="o">=</span><span class="n">transaction</span><span class="p">)</span>
<span class="n">tweets</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="n">ReadFromSpanner</span><span class="p">(</span><span class="n">PROJECT_ID</span><span class="p">,</span> <span class="n">INSTANCE_ID</span><span class="p">,</span> <span class="n">DB_NAME</span><span class="p">,</span>
<span class="n">sql</span><span class="o">=</span><span class="s1">&#39;Select * from tweets&#39;</span><span class="p">,</span> <span class="n">transaction</span><span class="o">=</span><span class="n">transaction</span><span class="p">)</span>
</pre></div>
</div>
<p>For further details of this transform, please review the docs on the
<a class="reference internal" href="#apache_beam.io.gcp.experimental.spannerio.create_transaction" title="apache_beam.io.gcp.experimental.spannerio.create_transaction"><code class="xref py py-meth docutils literal notranslate"><span class="pre">create_transaction()</span></code></a> method available in the SpannerIO API.</p>
<p>ReadFromSpanner takes this transform in the constructor and pass this to the
read pipeline as the singleton side input.</p>
<p>Writing Data to Cloud Spanner.</p>
<p>The WriteToSpanner transform writes to Cloud Spanner by executing a
collection a input rows (WriteMutation). The mutations are grouped into
batches for efficiency.</p>
<p>WriteToSpanner transform relies on the WriteMutation objects which is exposed
by the SpannerIO API. WriteMutation have five static methods (insert, update,
insert_or_update, replace, delete). These methods returns the instance of the
_Mutator object which contains the mutation type and the Spanner Mutation
object. For more details, review the docs of the class SpannerIO.WriteMutation.
For example::</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mutations</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">WriteMutation</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">&#39;user&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;email&#39;</span><span class="p">),</span>
<span class="n">values</span><span class="o">=</span><span class="p">[(</span><span class="s1">&#39;sara&#39;</span><span class="p">,</span> <span class="s1">&#39;sara@dev.com&#39;</span><span class="p">)])</span>
<span class="p">]</span>
<span class="n">_</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span>
<span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="n">mutations</span><span class="p">)</span>
<span class="o">|</span> <span class="n">WriteToSpanner</span><span class="p">(</span>
<span class="n">project_id</span><span class="o">=</span><span class="n">SPANNER_PROJECT_ID</span><span class="p">,</span>
<span class="n">instance_id</span><span class="o">=</span><span class="n">SPANNER_INSTANCE_ID</span><span class="p">,</span>
<span class="n">database_id</span><span class="o">=</span><span class="n">SPANNER_DATABASE_NAME</span><span class="p">)</span>
<span class="p">)</span>
</pre></div>
</div>
<p>You can also create WriteMutation via calling its constructor. For example::</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mutations</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">WriteMutation</span><span class="p">(</span><span class="n">insert</span><span class="o">=</span><span class="s1">&#39;users&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;email&#39;</span><span class="p">),</span>
<span class="n">values</span><span class="o">=</span><span class="p">[(</span><span class="s1">&#39;sara&quot;, &#39;</span><span class="n">sara</span><span class="nd">@example</span><span class="o">.</span><span class="n">com</span><span class="s1">&#39;)])</span>
<span class="p">]</span>
</pre></div>
</div>
<p>For more information, review the docs available on WriteMutation class.</p>
<p>WriteToSpanner transform also takes three batching parameters (max_number_rows,
max_number_cells and max_batch_size_bytes). By default, max_number_rows is set
to 50 rows, max_number_cells is set to 500 cells and max_batch_size_bytes is
set to 1MB (1048576 bytes). These parameter used to reduce the number of
transactions sent to spanner by grouping the mutation into batches. Setting
these param values either to smaller value or zero to disable batching.
Unlike the Java connector, this connector does not create batches of
transactions sorted by table and primary key.</p>
<p>WriteToSpanner transforms starts with the grouping into batches. The first step
in this process is to make the make the mutation groups of the WriteMutation
objects and then filtering them into batchable and unbatchable mutation
groups. There are three batching parameters (max_number_cells, max_number_rows
&amp; max_batch_size_bytes). We calculated th mutation byte size from the method
available in the <cite>google.cloud.spanner_v1.proto.mutation_pb2.Mutation.ByteSize</cite>.
if the mutation rows, cells or byte size are larger than value of the any
batching parameters param, it will be tagged as “unbatchable” mutation. After
this all the batchable mutation are merged into a single mutation group whos
size is not larger than the “max_batch_size_bytes”, after this process, all the
mutation groups together to process. If the Mutation references a table or
column does not exits, it will cause a exception and fails the entire pipeline.</p>
<dl class="class">
<dt id="apache_beam.io.gcp.experimental.spannerio.ReadOperation">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.experimental.spannerio.</code><code class="descname">ReadOperation</code><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#ReadOperation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.ReadOperation" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#apache_beam.io.gcp.experimental.spannerio.ReadOperation" title="apache_beam.io.gcp.experimental.spannerio.ReadOperation"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.io.gcp.experimental.spannerio.ReadOperation</span></code></a></p>
<p>Encapsulates a spanner read operation.</p>
<p>Create new instance of ReadOperation(is_sql, is_table, read_operation, kwargs)</p>
<dl class="classmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.ReadOperation.query">
<em class="property">classmethod </em><code class="descname">query</code><span class="sig-paren">(</span><em>sql</em>, <em>params=None</em>, <em>param_types=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#ReadOperation.query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.ReadOperation.query" title="Permalink to this definition"></a></dt>
<dd><p>A convenient method to construct ReadOperation from sql query.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> – SQL query statement</li>
<li><strong>params</strong> – (optional) values for parameter replacement. Keys must match the
names used in sql</li>
<li><strong>param_types</strong> – (optional) maps explicit types for one or more param values;
required if parameters are passed.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="classmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.ReadOperation.table">
<em class="property">classmethod </em><code class="descname">table</code><span class="sig-paren">(</span><em>table</em>, <em>columns</em>, <em>index=''</em>, <em>keyset=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#ReadOperation.table"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.ReadOperation.table" title="Permalink to this definition"></a></dt>
<dd><p>A convenient method to construct ReadOperation from table.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> – name of the table from which to fetch data.</li>
<li><strong>columns</strong> – names of columns to be retrieved.</li>
<li><strong>index</strong> – (optional) name of index to use, rather than the table’s primary
key.</li>
<li><strong>keyset</strong> – (optional) <cite>KeySet</cite> keys / ranges identifying rows to be
retrieved.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<dl class="function">
<dt id="apache_beam.io.gcp.experimental.spannerio.create_transaction">
<code class="descclassname">apache_beam.io.gcp.experimental.spannerio.</code><code class="descname">create_transaction</code><span class="sig-paren">(</span><em>pbegin</em>, <em>project_id</em>, <em>instance_id</em>, <em>database_id</em>, <em>credentials=None</em>, <em>pool=None</em>, <em>read_timestamp=None</em>, <em>exact_staleness=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#create_transaction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.create_transaction" title="Permalink to this definition"></a></dt>
<dd><p>A PTransform method to create a batch transaction.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>pbegin</strong> – Root of the pipeline</li>
<li><strong>project_id</strong> – Cloud spanner project id. Be sure to use the Project ID,
not the Project Number.</li>
<li><strong>instance_id</strong> – Cloud spanner instance id.</li>
<li><strong>database_id</strong> – Cloud spanner database id.</li>
<li><strong>credentials</strong> – (optional) The authorization credentials to attach to requests.
These credentials identify this application to the service.
If none are specified, the client will attempt to ascertain
the credentials from the environment.</li>
<li><strong>pool</strong> – (optional) session pool to be used by database. If not passed,
Spanner Cloud SDK uses the BurstyPool by default.
<cite>google.cloud.spanner.BurstyPool</cite>. Ref:
<a class="reference external" href="https://googleapis.dev/python/spanner/latest/database-api.html">https://googleapis.dev/python/spanner/latest/database-api.html</a>?#google.
cloud.spanner_v1.database.Database</li>
<li><strong>read_timestamp</strong> – (optional) An instance of the <cite>datetime.datetime</cite> object to
execute all reads at the given timestamp.</li>
<li><strong>exact_staleness</strong> – (optional) An instance of the <cite>datetime.timedelta</cite>
object. These timestamp bounds execute reads at a user-specified
timestamp.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.experimental.spannerio.ReadFromSpanner">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.experimental.spannerio.</code><code class="descname">ReadFromSpanner</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance_id</em>, <em>database_id</em>, <em>pool=None</em>, <em>read_timestamp=None</em>, <em>exact_staleness=None</em>, <em>credentials=None</em>, <em>sql=None</em>, <em>params=None</em>, <em>param_types=None</em>, <em>table=None</em>, <em>columns=None</em>, <em>index=''</em>, <em>keyset=None</em>, <em>read_operations=None</em>, <em>transaction=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#ReadFromSpanner"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.ReadFromSpanner" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>A PTransform to perform reads from cloud spanner.
ReadFromSpanner uses BatchAPI to perform all read operations.</p>
<p>A PTransform that uses Spanner Batch API to perform reads.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> – Cloud spanner project id. Be sure to use the Project ID,
not the Project Number.</li>
<li><strong>instance_id</strong> – Cloud spanner instance id.</li>
<li><strong>database_id</strong> – Cloud spanner database id.</li>
<li><strong>pool</strong> – (optional) session pool to be used by database. If not passed,
Spanner Cloud SDK uses the BurstyPool by default.
<cite>google.cloud.spanner.BurstyPool</cite>. Ref:
<a class="reference external" href="https://googleapis.dev/python/spanner/latest/database-api.html">https://googleapis.dev/python/spanner/latest/database-api.html</a>?#google.
cloud.spanner_v1.database.Database</li>
<li><strong>read_timestamp</strong> – (optional) An instance of the <cite>datetime.datetime</cite> object
to execute all reads at the given timestamp. By default, set to <cite>None</cite>.</li>
<li><strong>exact_staleness</strong> – (optional) An instance of the <cite>datetime.timedelta</cite>
object. These timestamp bounds execute reads at a user-specified
timestamp. By default, set to <cite>None</cite>.</li>
<li><strong>credentials</strong> – (optional) The authorization credentials to attach to
requests. These credentials identify this application to the service.
If none are specified, the client will attempt to ascertain
the credentials from the environment. By default, set to <cite>None</cite>.</li>
<li><strong>sql</strong> – (optional) SQL query statement.</li>
<li><strong>params</strong> – (optional) Values for parameter replacement. Keys must match the
names used in sql. By default, set to <cite>None</cite>.</li>
<li><strong>param_types</strong> – (optional) maps explicit types for one or more param values;
required if params are passed. By default, set to <cite>None</cite>.</li>
<li><strong>table</strong> – (optional) Name of the table from which to fetch data. By
default, set to <cite>None</cite>.</li>
<li><strong>columns</strong> – (optional) List of names of columns to be retrieved; required if
the table is passed. By default, set to <cite>None</cite>.</li>
<li><strong>index</strong> – (optional) name of index to use, rather than the table’s primary
key. By default, set to <cite>None</cite>.</li>
<li><strong>keyset</strong> – (optional) keys / ranges identifying rows to be retrieved. By
default, set to <cite>None</cite>.</li>
<li><strong>read_operations</strong> – (optional) List of the objects of <a class="reference internal" href="#apache_beam.io.gcp.experimental.spannerio.ReadOperation" title="apache_beam.io.gcp.experimental.spannerio.ReadOperation"><code class="xref py py-class docutils literal notranslate"><span class="pre">ReadOperation</span></code></a>
to perform read all. By default, set to <cite>None</cite>.</li>
<li><strong>transaction</strong> – (optional) PTransform of the <a class="reference internal" href="#apache_beam.io.gcp.experimental.spannerio.create_transaction" title="apache_beam.io.gcp.experimental.spannerio.create_transaction"><code class="xref py py-meth docutils literal notranslate"><span class="pre">create_transaction()</span></code></a> to
perform naive read on cloud spanner. By default, set to <cite>None</cite>.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.io.gcp.experimental.spannerio.ReadFromSpanner.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pbegin</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#ReadFromSpanner.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.ReadFromSpanner.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.experimental.spannerio.ReadFromSpanner.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#ReadFromSpanner.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.ReadFromSpanner.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteToSpanner">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.experimental.spannerio.</code><code class="descname">WriteToSpanner</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance_id</em>, <em>database_id</em>, <em>pool=None</em>, <em>credentials=None</em>, <em>max_batch_size_bytes=1048576</em>, <em>max_number_rows=50</em>, <em>max_number_cells=500</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteToSpanner"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteToSpanner" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>A PTransform to write onto Google Cloud Spanner.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> – Cloud spanner project id. Be sure to use the Project ID,
not the Project Number.</li>
<li><strong>instance_id</strong> – Cloud spanner instance id.</li>
<li><strong>database_id</strong> – Cloud spanner database id.</li>
<li><strong>max_batch_size_bytes</strong> – (optional) Split the mutations into batches to
reduce the number of transaction sent to Spanner. By default it is
set to 1 MB (1048576 Bytes).</li>
<li><strong>max_number_rows</strong> – (optional) Split the mutations into batches to
reduce the number of transaction sent to Spanner. By default it is
set to 50 rows per batch.</li>
<li><strong>max_number_cells</strong> – (optional) Split the mutations into batches to
reduce the number of transaction sent to Spanner. By default it is
set to 500 cells per batch.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteToSpanner.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteToSpanner.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteToSpanner.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteToSpanner.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteToSpanner.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteToSpanner.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.experimental.spannerio.MutationGroup">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.experimental.spannerio.</code><code class="descname">MutationGroup</code><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#MutationGroup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.MutationGroup" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/collections.html#collections.deque" title="(in Python v3.9)"><code class="xref py py-class docutils literal notranslate"><span class="pre">collections.deque</span></code></a></p>
<p>A Bundle of Spanner Mutations (_Mutator).</p>
<dl class="attribute">
<dt id="apache_beam.io.gcp.experimental.spannerio.MutationGroup.info">
<code class="descname">info</code><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.MutationGroup.info" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.experimental.spannerio.MutationGroup.primary">
<code class="descname">primary</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#MutationGroup.primary"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.MutationGroup.primary" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteMutation">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.experimental.spannerio.</code><code class="descname">WriteMutation</code><span class="sig-paren">(</span><em>insert=None</em>, <em>update=None</em>, <em>insert_or_update=None</em>, <em>replace=None</em>, <em>delete=None</em>, <em>columns=None</em>, <em>values=None</em>, <em>keyset=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteMutation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteMutation" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.9)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>A convenient class to create Spanner Mutations for Write. User can provide
the operation via constructor or via static methods.</p>
<p>Note: If a user passing the operation via construction, make sure that it
will only accept one operation at a time. For example, if a user passing
a table name in the <cite>insert</cite> parameter, and he also passes the <cite>update</cite>
parameter value, this will cause an error.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>insert</strong> – (Optional) Name of the table in which rows will be inserted.</li>
<li><strong>update</strong> – (Optional) Name of the table in which existing rows will be
updated.</li>
<li><strong>insert_or_update</strong> – (Optional) Table name in which rows will be written.
Like insert, except that if the row already exists, then its column
values are overwritten with the ones provided. Any column values not
explicitly written are preserved.</li>
<li><strong>replace</strong> – (Optional) Table name in which rows will be replaced. Like
insert, except that if the row already exists, it is deleted, and the
column values provided are inserted instead. Unlike <cite>insert_or_update</cite>,
this means any values not explicitly written become <cite>NULL</cite>.</li>
<li><strong>delete</strong> – (Optional) Table name from which rows will be deleted. Succeeds
whether or not the named rows were present.</li>
<li><strong>columns</strong> – The names of the columns in table to be written. The list of
columns must contain enough columns to allow Cloud Spanner to derive
values for all primary key columns in the row(s) to be modified.</li>
<li><strong>values</strong> – The values to be written. <cite>values</cite> can contain more than one
list of values. If it does, then multiple rows are written, one for
each entry in <cite>values</cite>. Each list in <cite>values</cite> must have exactly as
many entries as there are entries in columns above. Sending multiple
lists is equivalent to sending multiple Mutations, each containing one
<cite>values</cite> entry and repeating table and columns.</li>
<li><strong>keyset</strong> – (Optional) The primary keys of the rows within table to delete.
Delete is idempotent. The transaction will succeed even if some or
all rows do not exist.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="staticmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteMutation.insert">
<em class="property">static </em><code class="descname">insert</code><span class="sig-paren">(</span><em>table</em>, <em>columns</em>, <em>values</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteMutation.insert"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteMutation.insert" title="Permalink to this definition"></a></dt>
<dd><p>Insert one or more new table rows.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> – Name of the table to be modified.</li>
<li><strong>columns</strong> – Name of the table columns to be modified.</li>
<li><strong>values</strong> – Values to be modified.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="staticmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteMutation.update">
<em class="property">static </em><code class="descname">update</code><span class="sig-paren">(</span><em>table</em>, <em>columns</em>, <em>values</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteMutation.update"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteMutation.update" title="Permalink to this definition"></a></dt>
<dd><p>Update one or more existing table rows.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> – Name of the table to be modified.</li>
<li><strong>columns</strong> – Name of the table columns to be modified.</li>
<li><strong>values</strong> – Values to be modified.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="staticmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteMutation.insert_or_update">
<em class="property">static </em><code class="descname">insert_or_update</code><span class="sig-paren">(</span><em>table</em>, <em>columns</em>, <em>values</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteMutation.insert_or_update"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteMutation.insert_or_update" title="Permalink to this definition"></a></dt>
<dd><p>Insert/update one or more table rows.
:param table: Name of the table to be modified.
:param columns: Name of the table columns to be modified.
:param values: Values to be modified.</p>
</dd></dl>
<dl class="staticmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteMutation.replace">
<em class="property">static </em><code class="descname">replace</code><span class="sig-paren">(</span><em>table</em>, <em>columns</em>, <em>values</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteMutation.replace"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteMutation.replace" title="Permalink to this definition"></a></dt>
<dd><p>Replace one or more table rows.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> – Name of the table to be modified.</li>
<li><strong>columns</strong> – Name of the table columns to be modified.</li>
<li><strong>values</strong> – Values to be modified.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="staticmethod">
<dt id="apache_beam.io.gcp.experimental.spannerio.WriteMutation.delete">
<em class="property">static </em><code class="descname">delete</code><span class="sig-paren">(</span><em>table</em>, <em>keyset</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/experimental/spannerio.html#WriteMutation.delete"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.experimental.spannerio.WriteMutation.delete" title="Permalink to this definition"></a></dt>
<dd><p>Delete one or more table rows.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> – Name of the table to be modified.</li>
<li><strong>keyset</strong> – Keys/ranges identifying rows to delete.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="apache_beam.io.gcp.big_query_query_to_table_pipeline.html" class="btn btn-neutral float-right" title="apache_beam.io.gcp.big_query_query_to_table_pipeline module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="apache_beam.io.gcp.experimental.html" class="btn btn-neutral float-left" title="apache_beam.io.gcp.experimental package" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>