blob: 4474ff0ed989b75c51814a01983ff01702fcaaa9 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>apache_beam.ml.rag.ingestion.mysql_common module &mdash; Apache Beam 2.68.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=b86133f3" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=2388e03a"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.ml.rag.ingestion.postgres module" href="apache_beam.ml.rag.ingestion.postgres.html" />
<link rel="prev" title="apache_beam.ml.rag.ingestion.mysql module" href="apache_beam.ml.rag.ingestion.mysql.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
Apache Beam
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.ml.html#subpackages">Subpackages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.anomaly.html">apache_beam.ml.anomaly package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.gcp.html">apache_beam.ml.gcp package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.inference.html">apache_beam.ml.inference package</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="apache_beam.ml.rag.html">apache_beam.ml.rag package</a><ul class="current">
<li class="toctree-l4 current"><a class="reference internal" href="apache_beam.ml.rag.html#subpackages">Subpackages</a></li>
<li class="toctree-l4"><a class="reference internal" href="apache_beam.ml.rag.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.transforms.html">apache_beam.ml.transforms package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.ml.ts.html">apache_beam.ml.ts package</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="breadcrumb-item"><a href="apache_beam.ml.rag.html">apache_beam.ml.rag package</a></li>
<li class="breadcrumb-item"><a href="apache_beam.ml.rag.ingestion.html">apache_beam.ml.rag.ingestion package</a></li>
<li class="breadcrumb-item active">apache_beam.ml.rag.ingestion.mysql_common module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.ml.rag.ingestion.mysql_common.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-apache_beam.ml.rag.ingestion.mysql_common">
<span id="apache-beam-ml-rag-ingestion-mysql-common-module"></span><h1>apache_beam.ml.rag.ingestion.mysql_common module<a class="headerlink" href="#module-apache_beam.ml.rag.ingestion.mysql_common" title="Link to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.chunk_embedding_fn">
<span class="sig-prename descclassname"><span class="pre">apache_beam.ml.rag.ingestion.mysql_common.</span></span><span class="sig-name descname"><span class="pre">chunk_embedding_fn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">chunk</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#chunk_embedding_fn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.chunk_embedding_fn" title="Link to this definition"></a></dt>
<dd><p>Convert embedding to MySQL vector string format.</p>
<p>Formats dense embedding as a MySQL-compatible vector string.
Example: [1.0, 2.0] -&gt; ‘[1.0,2.0]’</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>chunk</strong> – Input Chunk object.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>MySQL vector string representation of the embedding.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a></p>
</dd>
<dt class="field-even">Raises<span class="colon">:</span></dt>
<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.13)"><strong>ValueError</strong></a> – If chunk has no dense embedding.</p>
</dd>
</dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.ml.rag.ingestion.mysql_common.</span></span><span class="sig-name descname"><span class="pre">ColumnSpec</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">python_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Type" title="(in Python v3.13)"><span class="pre">Type</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">value_fn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">placeholder</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'?'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpec"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="Link to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>Specification for mapping Chunk fields to MySQL columns for insertion.</p>
<p>Defines how to extract and format values from Chunks into MySQL database
columns, handling the full pipeline from Python value to SQL insertion.</p>
<p>The insertion process works as follows:
- value_fn extracts a value from the Chunk and formats it as needed
- The value is stored in a NamedTuple field with the specified python_type
- During SQL insertion, the value is bound to a ? placeholder</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.column_name">
<span class="sig-name descname"><span class="pre">column_name</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.column_name" title="Link to this definition"></a></dt>
<dd><p>The column name in the database table.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a></p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.python_type">
<span class="sig-name descname"><span class="pre">python_type</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.python_type" title="Link to this definition"></a></dt>
<dd><p>Python type for the NamedTuple field that will hold the
value. Must be compatible with
<a class="reference internal" href="apache_beam.coders.row_coder.html#apache_beam.coders.row_coder.RowCoder" title="apache_beam.coders.row_coder.RowCoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">RowCoder</span></code></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>Type</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.value_fn">
<span class="sig-name descname"><span class="pre">value_fn</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.value_fn" title="Link to this definition"></a></dt>
<dd><p>Function to extract and format the value from a Chunk.
Takes a Chunk and returns a value of python_type.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>Callable[[<a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk">apache_beam.ml.rag.types.Chunk</a>], Any]</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.placeholder">
<span class="sig-name descname"><span class="pre">placeholder</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.placeholder" title="Link to this definition"></a></dt>
<dd><p>Optional placeholder to apply typecasts or functions to
value ? placeholder e.g. “string_to_vector(?)” for vector columns.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a></p>
</dd>
</dl>
</dd></dl>
<p class="rubric">Examples</p>
<p>Basic text column (uses standard JDBC type mapping):</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">ColumnSpec</span><span class="o">.</span><span class="n">text</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">column_name</span><span class="o">=</span><span class="s2">&quot;content&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">value_fn</span><span class="o">=</span><span class="k">lambda</span> <span class="n">chunk</span><span class="p">:</span> <span class="n">chunk</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">text</span>
<span class="gp">... </span><span class="p">)</span>
<span class="gp">... </span><span class="c1"># Results in: INSERT INTO table (content) VALUES (?)</span>
</pre></div>
</div>
<p>Timestamp from metadata:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">ColumnSpec</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">column_name</span><span class="o">=</span><span class="s2">&quot;created_at&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">python_type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">value_fn</span><span class="o">=</span><span class="k">lambda</span> <span class="n">chunk</span><span class="p">:</span> <span class="n">chunk</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;timestamp&quot;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">)</span>
<span class="gp">... </span><span class="c1"># Results in: INSERT INTO table (created_at) VALUES (?)</span>
</pre></div>
</div>
<dl class="simple">
<dt>Factory Methods:</dt><dd><p>text: Creates a text column specification.
integer: Creates an integer column specification.
float: Creates a float column specification.
vector: Creates a vector column specification with string_to_vector().
json: Creates a JSON column specification.</p>
</dd>
</dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id0">
<span class="sig-name descname"><span class="pre">column_name</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></em><a class="headerlink" href="#id0" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id1">
<span class="sig-name descname"><span class="pre">python_type</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Type" title="(in Python v3.13)"><span class="pre">Type</span></a></em><a class="headerlink" href="#id1" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id2">
<span class="sig-name descname"><span class="pre">value_fn</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#id2" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id3">
<span class="sig-name descname"><span class="pre">placeholder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'?'</span></em><a class="headerlink" href="#id3" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.text">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">text</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">value_fn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpec.text"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.text" title="Link to this definition"></a></dt>
<dd><p>Create a text column specification.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.integer">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">integer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">value_fn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpec.integer"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.integer" title="Link to this definition"></a></dt>
<dd><p>Create an integer column specification.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.float">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">float</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">value_fn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpec.float"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.float" title="Link to this definition"></a></dt>
<dd><p>Create a float column specification.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.vector">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">vector</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">column_name:</span> <span class="pre">str,</span> <span class="pre">value_fn:</span> <span class="pre">~typing.Callable[[~apache_beam.ml.rag.types.Chunk],</span> <span class="pre">~typing.Any]</span> <span class="pre">=</span> <span class="pre">&lt;function</span> <span class="pre">chunk_embedding_fn&gt;</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpec.vector"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.vector" title="Link to this definition"></a></dt>
<dd><p>Create a vector column specification with string_to_vector() function.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.json">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">value_fn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="apache_beam.ml.rag.types.html#apache_beam.ml.rag.types.Chunk" title="apache_beam.ml.rag.types.Chunk"><span class="pre">Chunk</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpec.json"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec.json" title="Link to this definition"></a></dt>
<dd><p>Create a JSON column specification.</p>
</dd></dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.embedding_to_string">
<span class="sig-prename descclassname"><span class="pre">apache_beam.ml.rag.ingestion.mysql_common.</span></span><span class="sig-name descname"><span class="pre">embedding_to_string</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">embedding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.List" title="(in Python v3.13)"><span class="pre">List</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><span class="pre">float</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#embedding_to_string"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.embedding_to_string" title="Link to this definition"></a></dt>
<dd><p>Convert embedding to MySQL vector string format.</p>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.ml.rag.ingestion.mysql_common.</span></span><span class="sig-name descname"><span class="pre">ColumnSpecsBuilder</span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="Link to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>Builder for <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a>’s with chainable methods.</p>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_defaults">
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">with_defaults</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.with_defaults"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_defaults" title="Link to this definition"></a></dt>
<dd><p>Add all default column specifications.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_id_spec">
<span class="sig-name descname"><span class="pre">with_id_spec</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">column_name:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'id',</span> <span class="pre">python_type:</span> <span class="pre">~typing.Type</span> <span class="pre">=</span> <span class="pre">&lt;class</span> <span class="pre">'str'&gt;,</span> <span class="pre">convert_fn:</span> <span class="pre">~typing.Callable[[str],</span> <span class="pre">~typing.Any]</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.with_id_spec"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_id_spec" title="Link to this definition"></a></dt>
<dd><p>Add ID <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> with optional type and conversion.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>column_name</strong> – Name for the ID column (defaults to “id”)</p></li>
<li><p><strong>python_type</strong> – Python type for the column (defaults to str)</p></li>
<li><p><strong>convert_fn</strong> – Optional function to convert the chunk ID
If None, uses ID as-is</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Self for method chaining</p>
</dd>
</dl>
<p class="rubric">Example</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">builder</span><span class="o">.</span><span class="n">with_id_spec</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">column_name</span><span class="o">=</span><span class="s2">&quot;doc_id&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">python_type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">convert_fn</span><span class="o">=</span><span class="k">lambda</span> <span class="nb">id</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="nb">id</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)[</span><span class="mi">1</span><span class="p">])</span>
<span class="gp">... </span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_content_spec">
<span class="sig-name descname"><span class="pre">with_content_spec</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">column_name:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'content',</span> <span class="pre">python_type:</span> <span class="pre">~typing.Type</span> <span class="pre">=</span> <span class="pre">&lt;class</span> <span class="pre">'str'&gt;,</span> <span class="pre">convert_fn:</span> <span class="pre">~typing.Callable[[str],</span> <span class="pre">~typing.Any]</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.with_content_spec"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_content_spec" title="Link to this definition"></a></dt>
<dd><p>Add content <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> with optional type and conversion.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>column_name</strong> – Name for the content column (defaults to “content”)</p></li>
<li><p><strong>python_type</strong> – Python type for the column (defaults to str)</p></li>
<li><p><strong>convert_fn</strong> – Optional function to convert the content text
If None, uses content text as-is</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Self for method chaining</p>
</dd>
</dl>
<p class="rubric">Example</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">builder</span><span class="o">.</span><span class="n">with_content_spec</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">column_name</span><span class="o">=</span><span class="s2">&quot;content_length&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">python_type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">convert_fn</span><span class="o">=</span><span class="nb">len</span> <span class="c1"># Store content length instead of content</span>
<span class="gp">... </span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_metadata_spec">
<span class="sig-name descname"><span class="pre">with_metadata_spec</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">column_name:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'metadata',</span> <span class="pre">python_type:</span> <span class="pre">~typing.Type</span> <span class="pre">=</span> <span class="pre">&lt;class</span> <span class="pre">'str'&gt;,</span> <span class="pre">convert_fn:</span> <span class="pre">~typing.Callable[[~typing.Dict[str,</span> <span class="pre">~typing.Any]],</span> <span class="pre">~typing.Any]</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.with_metadata_spec"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_metadata_spec" title="Link to this definition"></a></dt>
<dd><p>Add metadata <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> with optional type and conversion.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>column_name</strong> – Name for the metadata column (defaults to “metadata”)</p></li>
<li><p><strong>python_type</strong> – Python type for the column (defaults to str)</p></li>
<li><p><strong>convert_fn</strong> – Optional function to convert the metadata dictionary
If None and python_type is str, converts to JSON string</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Self for method chaining</p>
</dd>
</dl>
<p class="rubric">Example</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">builder</span><span class="o">.</span><span class="n">with_metadata_spec</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">column_name</span><span class="o">=</span><span class="s2">&quot;meta_tags&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">python_type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">convert_fn</span><span class="o">=</span><span class="k">lambda</span> <span class="n">meta</span><span class="p">:</span> <span class="s1">&#39;,&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">meta</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="gp">... </span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_embedding_spec">
<span class="sig-name descname"><span class="pre">with_embedding_spec</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">column_name:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'embedding',</span> <span class="pre">convert_fn:</span> <span class="pre">~typing.Callable[[~typing.List[float]],</span> <span class="pre">~typing.Any]</span> <span class="pre">=</span> <span class="pre">&lt;function</span> <span class="pre">embedding_to_string&gt;</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.with_embedding_spec"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.with_embedding_spec" title="Link to this definition"></a></dt>
<dd><p>Add embedding <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> with optional conversion.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>column_name</strong> – Name for the embedding column (defaults to “embedding”)</p></li>
<li><p><strong>convert_fn</strong> – Optional function to convert the dense embedding values
If None, uses default MySQL vector format</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Self for method chaining</p>
</dd>
</dl>
<p class="rubric">Example</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">builder</span><span class="o">.</span><span class="n">with_embedding_spec</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">column_name</span><span class="o">=</span><span class="s2">&quot;embedding_vector&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">convert_fn</span><span class="o">=</span><span class="k">lambda</span> <span class="n">values</span><span class="p">:</span> <span class="s1">&#39;[&#39;</span> <span class="o">+</span> <span class="s1">&#39;,&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">x</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">values</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;]&#39;</span>
<span class="gp">... </span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.add_metadata_field">
<span class="sig-name descname"><span class="pre">add_metadata_field</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">field</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">python_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Type" title="(in Python v3.13)"><span class="pre">Type</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convert_fn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">default</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.13)"><span class="pre">Any</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.add_metadata_field"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.add_metadata_field" title="Link to this definition"></a></dt>
<dd><p>Add a <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> that extracts and converts a field from
chunk metadata.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>field</strong> – Key to extract from chunk metadata</p></li>
<li><p><strong>python_type</strong> – Python type for the column (e.g. str, int, float)</p></li>
<li><p><strong>column_name</strong> – Name for the column (defaults to metadata field name)</p></li>
<li><p><strong>convert_fn</strong> – Optional function to convert the extracted value to
desired type. If None, value is used as-is</p></li>
<li><p><strong>default</strong> – Default value if field is missing from metadata</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Self for chaining</p>
</dd>
</dl>
<p class="rubric">Examples</p>
<p>Simple string field:
&gt;&gt;&gt; builder.add_metadata_field(“source”, str)</p>
<p>Integer with default:
&gt;&gt;&gt; builder.add_metadata_field(
… field=”count”,
… python_type=int,
… column_name=”item_count”,
… default=0
… )</p>
<p>Float with conversion and default:
&gt;&gt;&gt; builder.add_metadata_field(
… field=”confidence”,
… python_type=float,
… convert_fn=lambda x: round(float(x), 2),
… default=0.0
… )</p>
<p>Timestamp with conversion:
&gt;&gt;&gt; builder.add_metadata_field(
… field=”created_at”,
… python_type=str,
… convert_fn=lambda ts: ts.replace(‘T’, ‘ ‘)
… )</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.add_custom_column_spec">
<span class="sig-name descname"><span class="pre">add_custom_column_spec</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">spec</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder"><span class="pre">ColumnSpecsBuilder</span></a></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.add_custom_column_spec"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.add_custom_column_spec" title="Link to this definition"></a></dt>
<dd><p>Add a custom <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> to the builder.</p>
<p>Use this method when you need complete control over the
<a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a>, including custom value extraction and type handling.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>spec</strong> – A <a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><code class="xref py py-class docutils literal notranslate"><span class="pre">ColumnSpec</span></code></a> instance defining the column name, type,
value extraction, and optional MySQL function.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Self for method chaining</p>
</dd>
</dl>
<p class="rubric">Examples</p>
<p>Custom text column from chunk metadata:
&gt;&gt;&gt; builder.add_custom_column_spec(
… ColumnSpec.text(
… column_name=”source_and_id”,
… value_fn=lambda chunk:
… f”{chunk.metadata.get(‘source’)}_{chunk.id}”
… )
… )</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.build">
<span class="sig-name descname"><span class="pre">build</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.List" title="(in Python v3.13)"><span class="pre">List</span></a><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec" title="apache_beam.ml.rag.ingestion.mysql_common.ColumnSpec"><span class="pre">ColumnSpec</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ColumnSpecsBuilder.build"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ColumnSpecsBuilder.build" title="Link to this definition"></a></dt>
<dd><p>Build the final list of column specifications.</p>
</dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.ml.rag.ingestion.mysql_common.</span></span><span class="sig-name descname"><span class="pre">ConflictResolution</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">action</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Literal" title="(in Python v3.13)"><span class="pre">Literal</span></a><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'UPDATE'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'IGNORE'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'UPDATE'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">update_fields</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.List" title="(in Python v3.13)"><span class="pre">List</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">primary_key_field</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/ml/rag/ingestion/mysql_common.html#ConflictResolution"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution" title="Link to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>Specification for how to handle conflicts during insert.</p>
<p>Configures conflict handling behavior when inserting records that may
violate unique constraints using MySQL’s ON DUPLICATE KEY UPDATE syntax.</p>
<p>MySQL automatically detects conflicts based on PRIMARY KEY or UNIQUE
constraints defined on the table.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution.action">
<span class="sig-name descname"><span class="pre">action</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution.action" title="Link to this definition"></a></dt>
<dd><p>How to handle conflicts - either “UPDATE” or “IGNORE”.
UPDATE: Updates existing record with new values.
IGNORE: Skips conflicting records (uses no-op update).</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>Literal[‘UPDATE’, ‘IGNORE’]</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution.update_fields">
<span class="sig-name descname"><span class="pre">update_fields</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution.update_fields" title="Link to this definition"></a></dt>
<dd><p>Optional list of fields to update on conflict. If None,
all fields are updated (for UPDATE action only).</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>List[<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a>] | None</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution.primary_key_field">
<span class="sig-name descname"><span class="pre">primary_key_field</span></span><a class="headerlink" href="#apache_beam.ml.rag.ingestion.mysql_common.ConflictResolution.primary_key_field" title="Link to this definition"></a></dt>
<dd><p>Required for IGNORE action. The primary key field
name to use for the no-op update.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a> | None</p>
</dd>
</dl>
</dd></dl>
<p class="rubric">Examples</p>
<p>Update all fields on conflict:
&gt;&gt;&gt; ConflictResolution(action=”UPDATE”)</p>
<p>Update specific fields on conflict:
&gt;&gt;&gt; ConflictResolution(
… action=”UPDATE”,
… update_fields=[“embedding”, “content”]
… )</p>
<p>Ignore conflicts with explicit primary key:
&gt;&gt;&gt; ConflictResolution(
… action=”IGNORE”,
… primary_key_field=”id”
… )</p>
<p>Ignore conflicts with custom primary key:
&gt;&gt;&gt; ConflictResolution(
… action=”IGNORE”,
… primary_key_field=”custom_id”
… )</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="id4">
<span class="sig-name descname"><span class="pre">action</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Literal" title="(in Python v3.13)"><span class="pre">Literal</span></a><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'UPDATE'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'IGNORE'</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'UPDATE'</span></em><a class="headerlink" href="#id4" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id5">
<span class="sig-name descname"><span class="pre">update_fields</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.List" title="(in Python v3.13)"><span class="pre">List</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id5" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="id6">
<span class="sig-name descname"><span class="pre">primary_key_field</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#id6" title="Link to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="apache_beam.ml.rag.ingestion.mysql.html" class="btn btn-neutral float-left" title="apache_beam.ml.rag.ingestion.mysql module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="apache_beam.ml.rag.ingestion.postgres.html" class="btn btn-neutral float-right" title="apache_beam.ml.rag.ingestion.postgres module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright %Y, Apache Beam.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>