pydoc/2.67.0/apache_beam.dataframe.io.html - beam-site - Git at Google



 <!DOCTYPE html>
 <html class="writer-html5" lang="en" data-content_root="./">
 <head>
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />

   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
   <title>apache_beam.dataframe.io module &mdash; Apache Beam 2.67.0 documentation</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=b86133f3" />
       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />


       <script src="_static/jquery.js?v=5d32c60e"></script>
       <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
       <script src="_static/documentation_options.js?v=959b4fbe"></script>
       <script src="_static/doctools.js?v=9a2dae69"></script>
       <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="_static/js/theme.js"></script>
     <link rel="index" title="Index" href="genindex.html" />
     <link rel="search" title="Search" href="search.html" />
     <link rel="next" title="apache_beam.dataframe.pandas_top_level_functions module" href="apache_beam.dataframe.pandas_top_level_functions.html" />
     <link rel="prev" title="apache_beam.dataframe.frames module" href="apache_beam.dataframe.frames.html" />
 </head>

 <body class="wy-body-for-nav">
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >


           <a href="index.html" class="icon icon-home">
             Apache Beam
           </a>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
     <input type="hidden" name="check_keywords" value="yes" />
     <input type="hidden" name="area" value="default" />
   </form>
 </div>
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="apache_beam.dataframe.html#submodules">Submodules</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.convert.html">apache_beam.dataframe.convert module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.doctests.html">apache_beam.dataframe.doctests module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.expressions.html">apache_beam.dataframe.expressions module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.frame_base.html">apache_beam.dataframe.frame_base module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.frames.html">apache_beam.dataframe.frames module</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">apache_beam.dataframe.io module</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="#sources">Sources</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#sinks">Sinks</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_gbq"><code class="docutils literal notranslate"><span class="pre">read_gbq()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_csv"><code class="docutils literal notranslate"><span class="pre">read_csv()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_csv"><code class="docutils literal notranslate"><span class="pre">to_csv()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_fwf"><code class="docutils literal notranslate"><span class="pre">read_fwf()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_json"><code class="docutils literal notranslate"><span class="pre">read_json()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_json"><code class="docutils literal notranslate"><span class="pre">to_json()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_html"><code class="docutils literal notranslate"><span class="pre">read_html()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_html"><code class="docutils literal notranslate"><span class="pre">to_html()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.ReadViaPandas"><code class="docutils literal notranslate"><span class="pre">ReadViaPandas</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.WriteViaPandas"><code class="docutils literal notranslate"><span class="pre">WriteViaPandas</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_excel"><code class="docutils literal notranslate"><span class="pre">read_excel()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_feather"><code class="docutils literal notranslate"><span class="pre">read_feather()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_parquet"><code class="docutils literal notranslate"><span class="pre">read_parquet()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_sas"><code class="docutils literal notranslate"><span class="pre">read_sas()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_spss"><code class="docutils literal notranslate"><span class="pre">read_spss()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.read_stata"><code class="docutils literal notranslate"><span class="pre">read_stata()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_excel"><code class="docutils literal notranslate"><span class="pre">to_excel()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_feather"><code class="docutils literal notranslate"><span class="pre">to_feather()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_parquet"><code class="docutils literal notranslate"><span class="pre">to_parquet()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#apache_beam.dataframe.io.to_stata"><code class="docutils literal notranslate"><span class="pre">to_stata()</span></code></a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.pandas_top_level_functions.html">apache_beam.dataframe.pandas_top_level_functions module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.partitionings.html">apache_beam.dataframe.partitionings module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.schemas.html">apache_beam.dataframe.schemas module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.transforms.html">apache_beam.dataframe.transforms module</a></li>
 </ul>
 </li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li>
 </ul>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
 </ul>

         </div>
       </div>
     </nav>

     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
           <a href="index.html">Apache Beam</a>
       </nav>

       <div class="wy-nav-content">
         <div class="rst-content">
           <div role="navigation" aria-label="Page navigation">
   <ul class="wy-breadcrumbs">
       <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
           <li class="breadcrumb-item"><a href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
       <li class="breadcrumb-item active">apache_beam.dataframe.io module</li>
       <li class="wy-breadcrumbs-aside">
             <a href="_sources/apache_beam.dataframe.io.rst.txt" rel="nofollow"> View page source</a>
       </li>
   </ul>
   <hr/>
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">

   <section id="module-apache_beam.dataframe.io">
 <span id="apache-beam-dataframe-io-module"></span><h1>apache_beam.dataframe.io module<a class="headerlink" href="#module-apache_beam.dataframe.io" title="Link to this heading"></a></h1>
 <p>Sources and sinks for the Beam DataFrame API.</p>
 <section id="sources">
 <h2>Sources<a class="headerlink" href="#sources" title="Link to this heading"></a></h2>
 <p>This module provides analogs for pandas <code class="docutils literal notranslate"><span class="pre">read</span></code> methods, like
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.read_csv.html#pandas.read_csv" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-func docutils literal notranslate"><span class="pre">pandas.read_csv()</span></code></a>. However Beam sources like <a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a>
 create a Beam <code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code>, and return a
 <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> or
 <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a> representing the contents
 of the referenced file(s) or data source.</p>
 <p>The result of these methods must be applied to a <code class="xref py py-class docutils literal notranslate"><span class="pre">Pipeline</span></code>
 object, for example:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">dataframe</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="o">...</span><span class="p">)</span>
 </pre></div>
 </div>
 </section>
 <section id="sinks">
 <h2>Sinks<a class="headerlink" href="#sinks" title="Link to this heading"></a></h2>
 <p>This module also defines analogs for pandas sink, or <code class="docutils literal notranslate"><span class="pre">to</span></code>, methods that
 generate a Beam <code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code>. Users should prefer calling
 these operations from <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a>
 instances (for example with
 <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv</span></code></a>).</p>
 </section>
 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_gbq">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_gbq</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">project_id</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_bqstorage_api</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_gbq"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_gbq" title="Link to this definition"></a></dt>
 <dd><p>This function reads data from a BigQuery table and produces a
 :class:<a href="#id1"><span class="problematic" id="id2">`</span></a>~apache_beam.dataframe.frames.DeferredDataFrame.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Please specify a table. This can be done in the format
 ‘PROJECT:dataset.table’ if one would not wish to utilize
 the parameters below.</p></li>
 <li><p><strong>dataset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Please specify the dataset
 (can omit if table was specified as ‘PROJECT:dataset.table’).</p></li>
 <li><p><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Please specify the project ID
 (can omit if table was specified as ‘PROJECT:dataset.table’).</p></li>
 <li><p><strong>use_bqstorage_api</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – If you would like to utilize
 the BigQuery Storage API in ReadFromBigQuery, please set
 this flag to true. Otherwise, please set flag
 to false or leave it unspecified.</p></li>
 </ul>
 </dd>
 </dl>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_csv">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">splittable</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">binary</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_csv"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_csv" title="Link to this definition"></a></dt>
 <dd><p>Read a comma-separated values (csv) file into DataFrame.</p>
 <p>Also supports optionally iterating or breaking of the file
 into chunks.</p>
 <p>Additional help can be found in the online docs for
 <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html">IO Tools</a>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
 expected. A local file could be: <a class="reference external" href="file://localhost/path/to/table.csv">file://localhost/path/to/table.csv</a>.</p>
 <p>If you want to pass in a path object, pandas accepts any <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method, such as
 a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function) or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 </p></li>
 <li><p><strong>sep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '</em><em>,</em><em>'</em>) – Character or regex pattern to treat as the delimiter. If <code class="docutils literal notranslate"><span class="pre">sep=None</span></code>, the
 C engine cannot automatically detect
 the separator, but the Python parsing engine can, meaning the latter will
 be used and automatically detect the separator from only the first valid
 row of the file by Python’s builtin sniffer tool, <code class="docutils literal notranslate"><span class="pre">csv.Sniffer</span></code>.
 In addition, separators longer than 1 character and different from
 <code class="docutils literal notranslate"><span class="pre">'\s+'</span></code> will be interpreted as regular expressions and will also force
 the use of the Python parsing engine. Note that regex delimiters are prone
 to ignoring quoted data. Regex example: <code class="docutils literal notranslate"><span class="pre">'\r\t'</span></code>.</p></li>
 <li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – Alias for <code class="docutils literal notranslate"><span class="pre">sep</span></code>.</p></li>
 <li><p><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>Sequence</em><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>'infer'</em><em> or </em><em>None</em><em>, </em><em>default 'infer'</em>) – Row number(s) containing column labels and marking the start of the
 data (zero-indexed). Default behavior is to infer the column names: if no <code class="docutils literal notranslate"><span class="pre">names</span></code>
 are passed the behavior is identical to <code class="docutils literal notranslate"><span class="pre">header=0</span></code> and column
 names are inferred from the first line of the file, if column
 names are passed explicitly to <code class="docutils literal notranslate"><span class="pre">names</span></code> then the behavior is identical to
 <code class="docutils literal notranslate"><span class="pre">header=None</span></code>. Explicitly pass <code class="docutils literal notranslate"><span class="pre">header=0</span></code> to be able to
 replace existing names. The header can be a list of integers that
 specify row locations for a <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.MultiIndex.html#pandas.MultiIndex" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-class docutils literal notranslate"><span class="pre">MultiIndex</span></code></a> on the columns
 e.g. <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">1,</span> <span class="pre">3]</span></code>. Intervening rows that are not specified will be
 skipped (e.g. 2 in this example is skipped). Note that this
 parameter ignores commented lines and empty lines if
 <code class="docutils literal notranslate"><span class="pre">skip_blank_lines=True</span></code>, so <code class="docutils literal notranslate"><span class="pre">header=0</span></code> denotes the first line of
 data rather than the first line of the file.</p></li>
 <li><p><strong>names</strong> (<em>Sequence</em><em> of </em><em>Hashable</em><em>, </em><em>optional</em>) – Sequence of column labels to apply. If the file contains a header row,
 then you should explicitly pass <code class="docutils literal notranslate"><span class="pre">header=0</span></code> to override the column names.
 Duplicates in this list are not allowed.</p></li>
 <li><p><strong>index_col</strong> (<em>Hashable</em><em>, </em><em>Sequence</em><em> of </em><em>Hashable</em><em> or </em><em>False</em><em>, </em><em>optional</em>) – <p>Column(s) to use as row label(s), denoted either by column labels or column
 indices.  If a sequence of labels or indices is given, <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.MultiIndex.html#pandas.MultiIndex" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-class docutils literal notranslate"><span class="pre">MultiIndex</span></code></a>
 will be formed for the row labels.</p>
 <p>Note: <code class="docutils literal notranslate"><span class="pre">index_col=False</span></code> can be used to force pandas to <em>not</em> use the first
 column as the index, e.g., when you have a malformed file with delimiters at
 the end of each line.</p>
 </p></li>
 <li><p><strong>usecols</strong> (<em>Sequence</em><em> of </em><em>Hashable</em><em> or </em><em>Callable</em><em>, </em><em>optional</em>) – <p>Subset of columns to select, denoted either by column labels or column indices.
 If list-like, all elements must either
 be positional (i.e. integer indices into the document columns) or strings
 that correspond to column names provided either by the user in <code class="docutils literal notranslate"><span class="pre">names</span></code> or
 inferred from the document header row(s). If <code class="docutils literal notranslate"><span class="pre">names</span></code> are given, the document
 header row(s) are not taken into account. For example, a valid list-like
 <code class="docutils literal notranslate"><span class="pre">usecols</span></code> parameter would be <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">1,</span> <span class="pre">2]</span></code> or <code class="docutils literal notranslate"><span class="pre">['foo',</span> <span class="pre">'bar',</span> <span class="pre">'baz']</span></code>.
 Element order is ignored, so <code class="docutils literal notranslate"><span class="pre">usecols=[0,</span> <span class="pre">1]</span></code> is the same as <code class="docutils literal notranslate"><span class="pre">[1,</span> <span class="pre">0]</span></code>.
 To instantiate a <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> from <code class="docutils literal notranslate"><span class="pre">data</span></code> with element order
 preserved use <code class="docutils literal notranslate"><span class="pre">pd.read_csv(data,</span> <span class="pre">usecols=['foo',</span> <span class="pre">'bar'])[['foo',</span> <span class="pre">'bar']]</span></code>
 for columns in <code class="docutils literal notranslate"><span class="pre">['foo',</span> <span class="pre">'bar']</span></code> order or
 <code class="docutils literal notranslate"><span class="pre">pd.read_csv(data,</span> <span class="pre">usecols=['foo',</span> <span class="pre">'bar'])[['bar',</span> <span class="pre">'foo']]</span></code>
 for <code class="docutils literal notranslate"><span class="pre">['bar',</span> <span class="pre">'foo']</span></code> order.</p>
 <p>If callable, the callable function will be evaluated against the column
 names, returning names where the callable function evaluates to <code class="docutils literal notranslate"><span class="pre">True</span></code>. An
 example of a valid callable argument would be <code class="docutils literal notranslate"><span class="pre">lambda</span> <span class="pre">x:</span> <span class="pre">x.upper()</span> <span class="pre">in</span>
 <span class="pre">['AAA',</span> <span class="pre">'BBB',</span> <span class="pre">'DDD']</span></code>. Using this parameter results in much faster
 parsing time and lower memory usage.</p>
 </p></li>
 <li><p><strong>dtype</strong> (<em>dtype</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>{Hashable : dtype}</em><em>, </em><em>optional</em>) – <p>Data type(s) to apply to either the whole dataset or individual columns.
 E.g., <code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">np.float64,</span> <span class="pre">'b':</span> <span class="pre">np.int32,</span> <span class="pre">'c':</span> <span class="pre">'Int64'}</span></code>
 Use <code class="docutils literal notranslate"><span class="pre">str</span></code> or <code class="docutils literal notranslate"><span class="pre">object</span></code> together with suitable <code class="docutils literal notranslate"><span class="pre">na_values</span></code> settings
 to preserve and not interpret <code class="docutils literal notranslate"><span class="pre">dtype</span></code>.
 If <code class="docutils literal notranslate"><span class="pre">converters</span></code> are specified, they will be applied INSTEAD
 of <code class="docutils literal notranslate"><span class="pre">dtype</span></code> conversion.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Support for <code class="docutils literal notranslate"><span class="pre">defaultdict</span></code> was added. Specify a <code class="docutils literal notranslate"><span class="pre">defaultdict</span></code> as input where
 the default determines the <code class="docutils literal notranslate"><span class="pre">dtype</span></code> of the columns which are not explicitly
 listed.</p>
 </div>
 </p></li>
 <li><p><strong>engine</strong> (<em>{'c'</em><em>, </em><em>'python'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>optional</em>) – <p>Parser engine to use. The C and pyarrow engines are faster, while the python engine
 is currently more feature-complete. Multithreading is currently only supported by
 the pyarrow engine.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.4.0: </span>The ‘pyarrow’ engine was added as an <em>experimental</em> engine, and some features
 are unsupported, or may not work correctly, with this engine.</p>
 </div>
 </p></li>
 <li><p><strong>converters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>{Hashable : Callable}</em><em>, </em><em>optional</em>) – Functions for converting values in specified columns. Keys can either
 be column labels or column indices.</p></li>
 <li><p><strong>true_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>optional</em>) – Values to consider as <code class="docutils literal notranslate"><span class="pre">True</span></code> in addition to case-insensitive variants of ‘True’.</p></li>
 <li><p><strong>false_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>optional</em>) – Values to consider as <code class="docutils literal notranslate"><span class="pre">False</span></code> in addition to case-insensitive variants of ‘False’.</p></li>
 <li><p><strong>skipinitialspace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Skip spaces after delimiter.</p></li>
 <li><p><strong>skiprows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em> or </em><em>Callable</em><em>, </em><em>optional</em>) – <p>Line numbers to skip (0-indexed) or number of lines to skip (<code class="docutils literal notranslate"><span class="pre">int</span></code>)
 at the start of the file.</p>
 <p>If callable, the callable function will be evaluated against the row
 indices, returning <code class="docutils literal notranslate"><span class="pre">True</span></code> if the row should be skipped and <code class="docutils literal notranslate"><span class="pre">False</span></code> otherwise.
 An example of a valid callable argument would be <code class="docutils literal notranslate"><span class="pre">lambda</span> <span class="pre">x:</span> <span class="pre">x</span> <span class="pre">in</span> <span class="pre">[0,</span> <span class="pre">2]</span></code>.</p>
 </p></li>
 <li><p><strong>skipfooter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 0</em>) – Number of lines at bottom of file to skip (Unsupported with <code class="docutils literal notranslate"><span class="pre">engine='c'</span></code>).</p></li>
 <li><p><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Number of rows of file to read. Useful for reading pieces of large files.</p></li>
 <li><p><strong>na_values</strong> (<em>Hashable</em><em>, </em><em>Iterable</em><em> of </em><em>Hashable</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>{Hashable : Iterable}</em><em>, </em><em>optional</em>) – Additional strings to recognize as <code class="docutils literal notranslate"><span class="pre">NA</span></code>/<code class="docutils literal notranslate"><span class="pre">NaN</span></code>. If <code class="docutils literal notranslate"><span class="pre">dict</span></code> passed, specific
 per-column <code class="docutils literal notranslate"><span class="pre">NA</span></code> values.  By default the following values are interpreted as
 <code class="docutils literal notranslate"><span class="pre">NaN</span></code>: “ “, “#N/A”, “#N/A N/A”, “#NA”, “-1.#IND”, “-1.#QNAN”, “-NaN”, “-nan”,
 “1.#IND”, “1.#QNAN”, “&lt;NA&gt;”, “N/A”, “NA”, “NULL”, “NaN”, “None”,
 “n/a”, “nan”, “null “.</p></li>
 <li><p><strong>keep_default_na</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>Whether or not to include the default <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values when parsing the data.
 Depending on whether <code class="docutils literal notranslate"><span class="pre">na_values</span></code> is passed in, the behavior is as follows:</p>
 <ul>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is <code class="docutils literal notranslate"><span class="pre">True</span></code>, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are specified, <code class="docutils literal notranslate"><span class="pre">na_values</span></code>
 is appended to the default <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values used for parsing.</p></li>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is <code class="docutils literal notranslate"><span class="pre">True</span></code>, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are not specified, only
 the default <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values are used for parsing.</p></li>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code>, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are specified, only
 the <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values specified <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are used for parsing.</p></li>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code>, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are not specified, no
 strings will be parsed as <code class="docutils literal notranslate"><span class="pre">NaN</span></code>.</p></li>
 </ul>
 <p>Note that if <code class="docutils literal notranslate"><span class="pre">na_filter</span></code> is passed in as <code class="docutils literal notranslate"><span class="pre">False</span></code>, the <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> and
 <code class="docutils literal notranslate"><span class="pre">na_values</span></code> parameters will be ignored.</p>
 </p></li>
 <li><p><strong>na_filter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Detect missing value markers (empty strings and the value of <code class="docutils literal notranslate"><span class="pre">na_values</span></code>). In
 data without any <code class="docutils literal notranslate"><span class="pre">NA</span></code> values, passing <code class="docutils literal notranslate"><span class="pre">na_filter=False</span></code> can improve the
 performance of reading a large file.</p></li>
 <li><p><strong>verbose</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Indicate number of <code class="docutils literal notranslate"><span class="pre">NA</span></code> values placed in non-numeric columns.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>skip_blank_lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, skip over blank lines rather than interpreting as <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values.</p></li>
 <li><p><strong>parse_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><em>Hashable</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><em>lists</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>{Hashable : list}</em><em>, </em><em>default False</em>) – <p>The behavior is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">bool</span></code>. If <code class="docutils literal notranslate"><span class="pre">True</span></code> -&gt; try parsing the index. Note: Automatically set to
 <code class="docutils literal notranslate"><span class="pre">True</span></code> if <code class="docutils literal notranslate"><span class="pre">date_format</span></code> or <code class="docutils literal notranslate"><span class="pre">date_parser</span></code> arguments have been passed.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">list</span></code> of <code class="docutils literal notranslate"><span class="pre">int</span></code> or names. e.g. If <code class="docutils literal notranslate"><span class="pre">[1,</span> <span class="pre">2,</span> <span class="pre">3]</span></code> -&gt; try parsing columns 1, 2, 3
 each as a separate date column.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">list</span></code> of <code class="docutils literal notranslate"><span class="pre">list</span></code>. e.g.  If <code class="docutils literal notranslate"><span class="pre">[[1,</span> <span class="pre">3]]</span></code> -&gt; combine columns 1 and 3 and parse
 as a single date column. Values are joined with a space before parsing.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">dict</span></code>, e.g. <code class="docutils literal notranslate"><span class="pre">{'foo'</span> <span class="pre">:</span> <span class="pre">[1,</span> <span class="pre">3]}</span></code> -&gt; parse columns 1, 3 as date and call
 result ‘foo’. Values are joined with a space before parsing.</p></li>
 </ul>
 <p>If a column or index cannot be represented as an array of <code class="docutils literal notranslate"><span class="pre">datetime</span></code>,
 say because of an unparsable value or a mixture of timezones, the column
 or index will be returned unaltered as an <code class="docutils literal notranslate"><span class="pre">object</span></code> data type. For
 non-standard <code class="docutils literal notranslate"><span class="pre">datetime</span></code> parsing, use <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.to_datetime.html#pandas.to_datetime" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code></a> after
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.read_csv.html#pandas.read_csv" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a>.</p>
 <p>Note: A fast-path exists for iso8601-formatted dates.</p>
 </p></li>
 <li><p><strong>infer_datetime_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If <code class="docutils literal notranslate"><span class="pre">True</span></code> and <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code> is enabled, pandas will attempt to infer the
 format of the <code class="docutils literal notranslate"><span class="pre">datetime</span></code> strings in the columns, and if it can be inferred,
 switch to a faster method of parsing them. In some cases this can increase
 the parsing speed by 5-10x.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.0.0: </span>A strict version of this argument is now the default, passing it has no effect.</p>
 </div>
 </p></li>
 <li><p><strong>keep_date_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code> and <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code> specifies combining multiple columns then
 keep the original columns.</p></li>
 <li><p><strong>date_parser</strong> (<em>Callable</em><em>, </em><em>optional</em>) – <p>Function to use for converting a sequence of string columns to an array of
 <code class="docutils literal notranslate"><span class="pre">datetime</span></code> instances. The default uses <code class="docutils literal notranslate"><span class="pre">dateutil.parser.parser</span></code> to do the
 conversion. pandas will try to call <code class="docutils literal notranslate"><span class="pre">date_parser</span></code> in three different ways,
 advancing to the next if an exception occurs: 1) Pass one or more arrays
 (as defined by <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code>) as arguments; 2) concatenate (row-wise) the
 string values from the columns defined by <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code> into a single array
 and pass that; and 3) call <code class="docutils literal notranslate"><span class="pre">date_parser</span></code> once for each row using one or
 more strings (corresponding to the columns defined by <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code>) as
 arguments.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.0.0: </span>Use <code class="docutils literal notranslate"><span class="pre">date_format</span></code> instead, or read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.to_datetime.html#pandas.to_datetime" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code></a> as-needed.</p>
 </div>
 </p></li>
 <li><p><strong>date_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>column -&gt; format</em><em>, </em><em>optional</em>) – <p>Format to use for parsing dates when used in conjunction with <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code>.
 The strftime to parse time, e.g. <code class="xref py py-const docutils literal notranslate"><span class="pre">&quot;%d/%m/%Y&quot;</span></code>. See
 <a class="reference external" href="https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior">strftime documentation</a> for more information on choices, though
 note that <code class="xref py py-const docutils literal notranslate"><span class="pre">&quot;%f&quot;</span></code> will parse all the way up to nanoseconds.
 You can also pass:</p>
 <ul>
 <li><dl class="simple">
 <dt>”ISO8601”, to parse any <a class="reference external" href="https://en.wikipedia.org/wiki/ISO_8601">ISO8601</a></dt><dd><p>time string (not necessarily in exactly the same format);</p>
 </dd>
 </dl>
 </li>
 <li><dl class="simple">
 <dt>”mixed”, to infer the format for each element individually. This is risky,</dt><dd><p>and you should probably use it along with <cite>dayfirst</cite>.</p>
 </dd>
 </dl>
 </li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>dayfirst</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – DD/MM format dates, international and European format.</p></li>
 <li><p><strong>cache_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, use a cache of unique, converted dates to apply the <code class="docutils literal notranslate"><span class="pre">datetime</span></code>
 conversion. May produce significant speed-up when parsing duplicate
 date strings, especially ones with timezone offsets.</p></li>
 <li><p><strong>iterator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Return <code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code> object for iteration or getting chunks with
 <code class="docutils literal notranslate"><span class="pre">get_chunk()</span></code>.</p></li>
 <li><p><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Number of lines to read from the file per chunk. Passing a value will cause the
 function to return a <code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code> object for iteration.
 See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking">IO Tools docs</a>
 for more information on <code class="docutils literal notranslate"><span class="pre">iterator</span></code> and <code class="docutils literal notranslate"><span class="pre">chunksize</span></code>.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘filepath_or_buffer’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified changed">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </p></li>
 <li><p><strong>thousands</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – Character acting as the thousands separator in numerical values.</p></li>
 <li><p><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>default '.'</em>) – Character to recognize as decimal point (e.g., use ‘,’ for European data).</p></li>
 <li><p><strong>lineterminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – Character used to denote a line break. Only valid with C parser.</p></li>
 <li><p><strong>quotechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – Character used to denote the start and end of a quoted item. Quoted
 items can include the <code class="docutils literal notranslate"><span class="pre">delimiter</span></code> and it will be ignored.</p></li>
 <li><p><strong>quoting</strong> (<em>{0</em><em> or </em><em>csv.QUOTE_MINIMAL</em><em>, </em><em>1</em><em> or </em><em>csv.QUOTE_ALL</em><em>, </em><em>2</em><em> or </em><em>csv.QUOTE_NONNUMERIC</em><em>, </em><em>3</em><em> or </em><em>csv.QUOTE_NONE}</em><em>, </em><em>default csv.QUOTE_MINIMAL</em>) – Control field quoting behavior per <code class="docutils literal notranslate"><span class="pre">csv.QUOTE_*</span></code> constants. Default is
 <code class="docutils literal notranslate"><span class="pre">csv.QUOTE_MINIMAL</span></code> (i.e., 0) which implies that only fields containing special
 characters are quoted (e.g., characters defined in <code class="docutils literal notranslate"><span class="pre">quotechar</span></code>, <code class="docutils literal notranslate"><span class="pre">delimiter</span></code>,
 or <code class="docutils literal notranslate"><span class="pre">lineterminator</span></code>.</p></li>
 <li><p><strong>doublequote</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – When <code class="docutils literal notranslate"><span class="pre">quotechar</span></code> is specified and <code class="docutils literal notranslate"><span class="pre">quoting</span></code> is not <code class="docutils literal notranslate"><span class="pre">QUOTE_NONE</span></code>, indicate
 whether or not to interpret two consecutive <code class="docutils literal notranslate"><span class="pre">quotechar</span></code> elements INSIDE a
 field as a single <code class="docutils literal notranslate"><span class="pre">quotechar</span></code> element.</p></li>
 <li><p><strong>escapechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – Character used to escape other characters.</p></li>
 <li><p><strong>comment</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – Character indicating that the remainder of line should not be parsed.
 If found at the beginning
 of a line, the line will be ignored altogether. This parameter must be a
 single character. Like empty lines (as long as <code class="docutils literal notranslate"><span class="pre">skip_blank_lines=True</span></code>),
 fully commented lines are ignored by the parameter <code class="docutils literal notranslate"><span class="pre">header</span></code> but not by
 <code class="docutils literal notranslate"><span class="pre">skiprows</span></code>. For example, if <code class="docutils literal notranslate"><span class="pre">comment='#'</span></code>, parsing
 <code class="docutils literal notranslate"><span class="pre">#empty\na,b,c\n1,2,3</span></code> with <code class="docutils literal notranslate"><span class="pre">header=0</span></code> will result in <code class="docutils literal notranslate"><span class="pre">'a,b,c'</span></code> being
 treated as the header.</p></li>
 <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default 'utf-8'</em>) – Encoding to use for UTF when reading/writing (ex. <code class="docutils literal notranslate"><span class="pre">'utf-8'</span></code>). <a class="reference external" href="https://docs.python.org/3/library/codecs.html#standard-encodings">List of Python
 standard encodings</a> .</p></li>
 <li><p><strong>encoding_errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default 'strict'</em>) – <p>How encoding errors are treated. <a class="reference external" href="https://docs.python.org/3/library/codecs.html#error-handlers">List of possible values</a> .</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.3.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>dialect</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/csv.html#csv.Dialect" title="(in Python v3.13)"><em>csv.Dialect</em></a><em>, </em><em>optional</em>) – If provided, this parameter will override values (default or not) for the
 following parameters: <code class="docutils literal notranslate"><span class="pre">delimiter</span></code>, <code class="docutils literal notranslate"><span class="pre">doublequote</span></code>, <code class="docutils literal notranslate"><span class="pre">escapechar</span></code>,
 <code class="docutils literal notranslate"><span class="pre">skipinitialspace</span></code>, <code class="docutils literal notranslate"><span class="pre">quotechar</span></code>, and <code class="docutils literal notranslate"><span class="pre">quoting</span></code>. If it is necessary to
 override values, a <code class="docutils literal notranslate"><span class="pre">ParserWarning</span></code> will be issued. See <code class="docutils literal notranslate"><span class="pre">csv.Dialect</span></code>
 documentation for more details.</p></li>
 <li><p><strong>on_bad_lines</strong> (<em>{'error'</em><em>, </em><em>'warn'</em><em>, </em><em>'skip'}</em><em> or </em><em>Callable</em><em>, </em><em>default 'error'</em>) – <p>Specifies what to do upon encountering a bad line (a line with too many fields).
 Allowed values are :</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">'error'</span></code>, raise an Exception when a bad line is encountered.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'warn'</span></code>, raise a warning when a bad line is encountered and skip that line.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'skip'</span></code>, skip bad lines without raising or warning when they are encountered.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.3.0.</span></p>
 </div>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.4.0: </span></p>
 <ul>
 <li><p>Callable, function with signature
 <code class="docutils literal notranslate"><span class="pre">(bad_line:</span> <span class="pre">list[str])</span> <span class="pre">-&gt;</span> <span class="pre">list[str]</span> <span class="pre">|</span> <span class="pre">None</span></code> that will process a single
 bad line. <code class="docutils literal notranslate"><span class="pre">bad_line</span></code> is a list of strings split by the <code class="docutils literal notranslate"><span class="pre">sep</span></code>.
 If the function returns <code class="docutils literal notranslate"><span class="pre">None</span></code>, the bad line will be ignored.
 If the function returns a new <code class="docutils literal notranslate"><span class="pre">list</span></code> of strings with more elements than
 expected, a <code class="docutils literal notranslate"><span class="pre">ParserWarning</span></code> will be emitted while dropping extra elements.
 Only supported when <code class="docutils literal notranslate"><span class="pre">engine='python'</span></code></p></li>
 </ul>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified changed">Changed in version 2.2.0: </span></p>
 <ul>
 <li><p>Callable, function with signature
 as described in <a class="reference external" href="https://arrow.apache.org/docs/python/generated/pyarrow.csv.ParseOptions.html#pyarrow.csv.ParseOptions.invalid_row_handler">pyarrow documentation</a> when <code class="docutils literal notranslate"><span class="pre">engine='pyarrow'</span></code></p></li>
 </ul>
 </div>
 </p></li>
 <li><p><strong>delim_whitespace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Specifies whether or not whitespace (e.g. <code class="docutils literal notranslate"><span class="pre">'</span> <span class="pre">'</span></code> or <code class="docutils literal notranslate"><span class="pre">'\t'</span></code>) will be
 used as the <code class="docutils literal notranslate"><span class="pre">sep</span></code> delimiter. Equivalent to setting <code class="docutils literal notranslate"><span class="pre">sep='\s+'</span></code>. If this option
 is set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, nothing should be passed in for the <code class="docutils literal notranslate"><span class="pre">delimiter</span></code>
 parameter.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.2.0: </span>Use <code class="docutils literal notranslate"><span class="pre">sep=&quot;\s+&quot;</span></code> instead.</p>
 </div>
 </p></li>
 <li><p><strong>low_memory</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Internally process the file in chunks, resulting in lower memory use
 while parsing, but possibly mixed type inference.  To ensure no mixed
 types either set <code class="docutils literal notranslate"><span class="pre">False</span></code>, or specify the type with the <code class="docutils literal notranslate"><span class="pre">dtype</span></code> parameter.
 Note that the entire file is read into a single <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 regardless, use the <code class="docutils literal notranslate"><span class="pre">chunksize</span></code> or <code class="docutils literal notranslate"><span class="pre">iterator</span></code> parameter to return the data in
 chunks. (Only valid with C parser).</p></li>
 <li><p><strong>memory_map</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – If a filepath is provided for <code class="docutils literal notranslate"><span class="pre">filepath_or_buffer</span></code>, map the file object
 directly onto memory and access the data directly from there. Using this
 option can improve performance because there is no longer any I/O overhead.</p></li>
 <li><p><strong>float_precision</strong> (<em>{'high'</em><em>, </em><em>'legacy'</em><em>, </em><em>'round_trip'}</em><em>, </em><em>optional</em>) – Specifies which converter the C engine should use for floating-point
 values. The options are <code class="docutils literal notranslate"><span class="pre">None</span></code> or <code class="docutils literal notranslate"><span class="pre">'high'</span></code> for the ordinary converter,
 <code class="docutils literal notranslate"><span class="pre">'legacy'</span></code> for the original lower precision pandas converter, and
 <code class="docutils literal notranslate"><span class="pre">'round_trip'</span></code> for the round-trip converter.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>A comma-separated values (csv) file is returned as two-dimensional
 data structure with labeled axes.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or TextFileReader</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>If your files are large and records do not contain quoted newlines, you may
 pass the extra argument <code class="docutils literal notranslate"><span class="pre">splittable=True</span></code> to enable dynamic splitting for
 this read on newlines. Using this option for records that do contain quoted
 newlines may result in partial records and data corruption.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv</span></code></dt><dd><p>Write DeferredDataFrame to a comma-separated values (csv) file.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_table</span></code></dt><dd><p>Read general delimited file into DeferredDataFrame.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_fwf" title="apache_beam.dataframe.io.read_fwf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_fwf</span></code></a></dt><dd><p>Read a table of fixed-width formatted lines into DeferredDataFrame.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;data.csv&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_csv">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">transform_label</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#to_csv"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.to_csv" title="Link to this definition"></a></dt>
 <dd><p>Write object to a comma-separated values (csv) file.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em><em>, or </em><em>None</em><em>, </em><em>default None</em>) – String, path object (implementing os.PathLike[str]), or file-like
 object implementing a write() function. If None, the result is
 returned as a string. If a non-binary file object is passed, it should
 be opened with <cite>newline=’’</cite>, disabling universal newlines. If a binary
 file object is passed, <cite>mode</cite> might need to contain a <cite>‘b’</cite>.</p></li>
 <li><p><strong>sep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '</em><em>,</em><em>'</em>) – String of length 1. Field delimiter for the output file.</p></li>
 <li><p><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</p></li>
 <li><p><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>Callable</em><em>, </em><em>default None</em>) – Format string for floating point numbers. If a Callable is given, it takes
 precedence over other numeric formatting parameters, like decimal.</p></li>
 <li><p><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em>) – Columns to write.</p></li>
 <li><p><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default True</em>) – Write out the column names. If a list of strings is given it is
 assumed to be aliases for the column names.</p></li>
 <li><p><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</p></li>
 <li><p><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><em>sequence</em><em>, or </em><em>False</em><em>, </em><em>default None</em>) – Column label for index column(s) if desired. If None is given, and
 <cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
 sequence should be given if the object uses MultiIndex. If
 False do not print fields for index names. Use index_label=False
 for easier importing in R.</p></li>
 <li><p><strong>mode</strong> (<em>{'w'</em><em>, </em><em>'x'</em><em>, </em><em>'a'}</em><em>, </em><em>default 'w'</em>) – <p>Forwarded to either <cite>open(mode=)</cite> or <cite>fsspec.open(mode=)</cite> to control
 the file opening. Typical values include:</p>
 <ul>
 <li><p>’w’, truncate the file first.</p></li>
 <li><p>’x’, exclusive creation, failing if the file already exists.</p></li>
 <li><p>’a’, append to the end of file if it exists.</p></li>
 </ul>
 </p></li>
 <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – A string representing the encoding to use in the output file,
 defaults to ‘utf-8’. <cite>encoding</cite> is not supported if <cite>path_or_buf</cite>
 is a non-binary file object.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output data. If ‘infer’ and ‘path_or_buf’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdCompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for faster compression and to create
 a reproducible gzip archive:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'gzip',</span> <span class="pre">'compresslevel':</span> <span class="pre">1,</span> <span class="pre">'mtime':</span> <span class="pre">1}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 <p>May be a dict with key ‘method’ as compression mode
 and other entries as additional compression options if
 compression mode is ‘zip’.</p>
 <p>Passing compression options as keys in dict is
 supported for compression modes ‘gzip’, ‘bz2’, ‘zstd’, and ‘zip’.</p>
 </div>
 </p></li>
 <li><p><strong>quoting</strong> (<em>optional constant from csv module</em>) – Defaults to csv.QUOTE_MINIMAL. If you have set a <cite>float_format</cite>
 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
 will treat them as non-numeric.</p></li>
 <li><p><strong>quotechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '&quot;'</em>) – String of length 1. Character used to quote fields.</p></li>
 <li><p><strong>lineterminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – <p>The newline character or character sequence to use in the output
 file. Defaults to <cite>os.linesep</cite>, which depends on the OS in which
 this method is called (’\n’ for linux, ‘\r\n’ for Windows, i.e.).</p>
 <div class="versionchanged">
 <p><span class="versionmodified changed">Changed in version 1.5.0: </span>Previously was line_terminator, changed for consistency with
 read_csv and the standard library ‘csv’ module.</p>
 </div>
 </p></li>
 <li><p><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em> or </em><em>None</em>) – Rows to write at a time.</p></li>
 <li><p><strong>date_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default None</em>) – Format string for datetime objects.</p></li>
 <li><p><strong>doublequote</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Control quoting of <cite>quotechar</cite> inside a field.</p></li>
 <li><p><strong>escapechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default None</em>) – String of length 1. Character used to escape <cite>sep</cite> and <cite>quotechar</cite>
 when appropriate.</p></li>
 <li><p><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator. E.g. use ‘,’ for
 European data.</p></li>
 <li><p><strong>errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default 'strict'</em>) – Specifies how encoding and decoding errors are to be handled.
 See the errors argument for <a class="reference external" href="https://docs.python.org/3/library/functions.html#open" title="(in Python v3.13)"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> for a full list
 of options.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>If path_or_buf is None, returns the resulting csv format as a
 string. Otherwise returns None.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p>None or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a></p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_csv</span></code></a></dt><dd><p>Load a CSV file into a DeferredDataFrame.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.to_excel" title="apache_beam.dataframe.io.to_excel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_excel</span></code></a></dt><dd><p>Write DeferredDataFrame to an Excel file.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create &#39;out.csv&#39; containing &#39;df&#39; without indices</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Raphael&#39;</span><span class="p">,</span> <span class="s1">&#39;Donatello&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="s1">&#39;mask&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;red&#39;</span><span class="p">,</span> <span class="s1">&#39;purple&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="s1">&#39;weapon&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;sai&#39;</span><span class="p">,</span> <span class="s1">&#39;bo staff&#39;</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;out.csv&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>

 <span class="go">Create &#39;out.zip&#39; containing &#39;out.csv&#39;</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 <span class="go">&#39;name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">compression_opts</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">&#39;zip&#39;</span><span class="p">,</span>
 <span class="gp">... </span>                        <span class="n">archive_name</span><span class="o">=</span><span class="s1">&#39;out.csv&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;out.zip&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
 <span class="gp">... </span>          <span class="n">compression</span><span class="o">=</span><span class="n">compression_opts</span><span class="p">)</span>

 <span class="go">To write a csv file to a new folder or nested folder you will first</span>
 <span class="go">need to create it using either Pathlib or os:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">filepath</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="s1">&#39;folder/subfolder/out.csv&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">filepath</span><span class="o">.</span><span class="n">parent</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">parents</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">filepath</span><span class="p">)</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="s1">&#39;folder/subfolder&#39;</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;folder/subfolder/out.csv&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_fwf">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_fwf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_fwf"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_fwf" title="Link to this definition"></a></dt>
 <dd><p>Read a table of fixed-width formatted lines into DataFrame.</p>
 <p>Also supports optionally iterating or breaking of the file
 into chunks.</p>
 <p>Additional help can be found in the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html">online docs for IO Tools</a>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a text <code class="docutils literal notranslate"><span class="pre">read()</span></code> function.The string could be a URL.
 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.csv</span></code>.</p></li>
 <li><p><strong>colspecs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.13)"><em>tuple</em></a><em> (</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>) or </em><em>'infer'. optional</em>) – A list of tuples giving the extents of the fixed-width
 fields of each line as half-open intervals (i.e.,  [from, to[ ).
 String value ‘infer’ can be used to instruct the parser to try
 detecting the column specifications from the first 100 rows of
 the data which are not being skipped via skiprows (default=’infer’).</p></li>
 <li><p><strong>widths</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – A list of field widths which can be used instead of ‘colspecs’ if
 the intervals are contiguous.</p></li>
 <li><p><strong>infer_nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 100</em>) – The number of rows to consider when letting the parser determine the
 <cite>colspecs</cite>.</p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>**kwds</strong> (<em>optional</em>) – Optional keyword arguments can be passed to <code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code>.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>A comma-separated values (csv) file is returned as two-dimensional
 data structure with labeled axes.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or TextFileReader</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv</span></code></dt><dd><p>Write DeferredDataFrame to a comma-separated values (csv) file.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_csv</span></code></a></dt><dd><p>Read a comma-separated values (csv) file into DeferredDataFrame.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_fwf</span><span class="p">(</span><span class="s1">&#39;data.csv&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_json">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_json"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_json" title="Link to this definition"></a></dt>
 <dd><p>Convert a JSON string to pandas object.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path_or_buf</strong> (<em>a valid JSON str</em><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.json</span></code>.</p>
 <p>If you want to pass in a path object, pandas accepts any
 <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method,
 such as a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function)
 or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.1.0: </span>Passing json literal strings is deprecated.</p>
 </div>
 </p></li>
 <li><p><strong>orient</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – <p>Indication of expected JSON string format.
 Compatible JSON strings can be produced by <code class="docutils literal notranslate"><span class="pre">to_json()</span></code> with a
 corresponding orient value.
 The set of possible orients is:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">'split'</span></code> : dict like
 <code class="docutils literal notranslate"><span class="pre">{index</span> <span class="pre">-&gt;</span> <span class="pre">[index],</span> <span class="pre">columns</span> <span class="pre">-&gt;</span> <span class="pre">[columns],</span> <span class="pre">data</span> <span class="pre">-&gt;</span> <span class="pre">[values]}</span></code></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'records'</span></code> : list like
 <code class="docutils literal notranslate"><span class="pre">[{column</span> <span class="pre">-&gt;</span> <span class="pre">value},</span> <span class="pre">...</span> <span class="pre">,</span> <span class="pre">{column</span> <span class="pre">-&gt;</span> <span class="pre">value}]</span></code></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'index'</span></code> : dict like <code class="docutils literal notranslate"><span class="pre">{index</span> <span class="pre">-&gt;</span> <span class="pre">{column</span> <span class="pre">-&gt;</span> <span class="pre">value}}</span></code></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'columns'</span></code> : dict like <code class="docutils literal notranslate"><span class="pre">{column</span> <span class="pre">-&gt;</span> <span class="pre">{index</span> <span class="pre">-&gt;</span> <span class="pre">value}}</span></code></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'values'</span></code> : just the values array</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">'table'</span></code> : dict like <code class="docutils literal notranslate"><span class="pre">{'schema':</span> <span class="pre">{schema},</span> <span class="pre">'data':</span> <span class="pre">{data}}</span></code></p></li>
 </ul>
 <p>The allowed and default values depend on the value
 of the <cite>typ</cite> parameter.</p>
 <ul>
 <li><p>when <code class="docutils literal notranslate"><span class="pre">typ</span> <span class="pre">==</span> <span class="pre">'series'</span></code>,</p>
 <ul>
 <li><p>allowed orients are <code class="docutils literal notranslate"><span class="pre">{'split','records','index'}</span></code></p></li>
 <li><p>default is <code class="docutils literal notranslate"><span class="pre">'index'</span></code></p></li>
 <li><p>The DeferredSeries index must be unique for orient <code class="docutils literal notranslate"><span class="pre">'index'</span></code>.</p></li>
 </ul>
 </li>
 <li><p>when <code class="docutils literal notranslate"><span class="pre">typ</span> <span class="pre">==</span> <span class="pre">'frame'</span></code>,</p>
 <ul>
 <li><p>allowed orients are <code class="docutils literal notranslate"><span class="pre">{'split','records','index',</span>
 <span class="pre">'columns','values',</span> <span class="pre">'table'}</span></code></p></li>
 <li><p>default is <code class="docutils literal notranslate"><span class="pre">'columns'</span></code></p></li>
 <li><p>The DeferredDataFrame index must be unique for orients <code class="docutils literal notranslate"><span class="pre">'index'</span></code> and
 <code class="docutils literal notranslate"><span class="pre">'columns'</span></code>.</p></li>
 <li><p>The DeferredDataFrame columns must be unique for orients <code class="docutils literal notranslate"><span class="pre">'index'</span></code>,
 <code class="docutils literal notranslate"><span class="pre">'columns'</span></code>, and <code class="docutils literal notranslate"><span class="pre">'records'</span></code>.</p></li>
 </ul>
 </li>
 </ul>
 </p></li>
 <li><p><strong>typ</strong> (<em>{'frame'</em><em>, </em><em>'series'}</em><em>, </em><em>default 'frame'</em>) – The type of object to recover.</p></li>
 <li><p><strong>dtype</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default None</em>) – <p>If True, infer dtypes; if a dict of column to dtype, then use those;
 if False, then don’t infer dtypes at all, applies only to the data.</p>
 <p>For all <code class="docutils literal notranslate"><span class="pre">orient</span></code> values except <code class="docutils literal notranslate"><span class="pre">'table'</span></code>, default is True.</p>
 </p></li>
 <li><p><strong>convert_axes</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default None</em>) – <p>Try to convert the axes to the proper dtypes.</p>
 <p>For all <code class="docutils literal notranslate"><span class="pre">orient</span></code> values except <code class="docutils literal notranslate"><span class="pre">'table'</span></code>, default is True.</p>
 </p></li>
 <li><p><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default True</em>) – If True then default datelike columns may be converted (depending on
 keep_default_dates).
 If False, no dates will be converted.
 If a list of column names, then those columns will be converted and
 default datelike columns may also be converted (depending on
 keep_default_dates).</p></li>
 <li><p><strong>keep_default_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>If parsing dates (convert_dates is not False), then try to parse the
 default datelike columns.
 A column label is datelike if</p>
 <ul>
 <li><p>it ends with <code class="docutils literal notranslate"><span class="pre">'_at'</span></code>,</p></li>
 <li><p>it ends with <code class="docutils literal notranslate"><span class="pre">'_time'</span></code>,</p></li>
 <li><p>it begins with <code class="docutils literal notranslate"><span class="pre">'timestamp'</span></code>,</p></li>
 <li><p>it is <code class="docutils literal notranslate"><span class="pre">'modified'</span></code>, or</p></li>
 <li><p>it is <code class="docutils literal notranslate"><span class="pre">'date'</span></code>.</p></li>
 </ul>
 </p></li>
 <li><p><strong>precise_float</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Set to enable usage of higher precision (strtod) function when
 decoding string to double values. Default (False) is to use fast but
 less precise builtin functionality.</p></li>
 <li><p><strong>date_unit</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default None</em>) – The timestamp unit to detect if converting dates. The default behaviour
 is to try and detect the correct precision, but if this is not desired
 then pass one of ‘s’, ‘ms’, ‘us’ or ‘ns’ to force parsing only seconds,
 milliseconds, microseconds or nanoseconds respectively.</p></li>
 <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default is 'utf-8'</em>) – The encoding to use to decode py3 bytes.</p></li>
 <li><p><strong>encoding_errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default &quot;strict&quot;</em>) – <p>How encoding errors are treated. <a class="reference external" href="https://docs.python.org/3/library/codecs.html#error-handlers">List of possible values</a> .</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.3.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Read the file as a json object per line.</p></li>
 <li><p><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Return JsonReader object for iteration.
 See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json">line-delimited json docs</a>
 for more information on <code class="docutils literal notranslate"><span class="pre">chunksize</span></code>.
 This can only be passed if <cite>lines=True</cite>.
 If this is None, the file will be read into memory all at once.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘path_or_buf’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified changed">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </p></li>
 <li><p><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – The number of lines from the line-delimited jsonfile that has to be read.
 This can only be passed if <cite>lines=True</cite>.
 If this is None, all the rows will be returned.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>engine</strong> (<em>{&quot;ujson&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>default &quot;ujson&quot;</em>) – <p>Parser engine to use. The <code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code> engine is only available when
 <code class="docutils literal notranslate"><span class="pre">lines=True</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>A JsonReader is returned when <code class="docutils literal notranslate"><span class="pre">chunksize</span></code> is not <code class="docutils literal notranslate"><span class="pre">0</span></code> or <code class="docutils literal notranslate"><span class="pre">None</span></code>.
 Otherwise, the type returned depends on the value of <code class="docutils literal notranslate"><span class="pre">typ</span></code>.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a>, <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a>, or pandas.api.typing.JsonReader</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_json</span></code></dt><dd><p>Convert a DeferredDataFrame to a JSON string.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.to_json</span></code></dt><dd><p>Convert a DeferredSeries to a JSON string.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">json_normalize</span></code></dt><dd><p>Normalize semi-structured JSON data into a flat table.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>Specific to <code class="docutils literal notranslate"><span class="pre">orient='table'</span></code>, if a <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> with a literal
 <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code> name of <cite>index</cite> gets written with <a class="reference internal" href="#apache_beam.dataframe.io.to_json" title="apache_beam.dataframe.io.to_json"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_json()</span></code></a>, the
 subsequent read operation will incorrectly set the <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code> name to
 <code class="docutils literal notranslate"><span class="pre">None</span></code>. This is because <cite>index</cite> is also used by <code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_json()</span></code>
 to denote a missing <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code> name, and the subsequent
 <a class="reference internal" href="#apache_beam.dataframe.io.read_json" title="apache_beam.dataframe.io.read_json"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_json()</span></code></a> operation cannot distinguish between the two. The same
 limitation is encountered with a <code class="xref py py-class docutils literal notranslate"><span class="pre">MultiIndex</span></code> and any names
 beginning with <code class="docutils literal notranslate"><span class="pre">'level_'</span></code>.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">io</span><span class="w"> </span><span class="kn">import</span> <span class="n">StringIO</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]],</span>
 <span class="gp">... </span>                  <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;row 1&#39;</span><span class="p">,</span> <span class="s1">&#39;row 2&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                  <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col 1&#39;</span><span class="p">,</span> <span class="s1">&#39;col 2&#39;</span><span class="p">])</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;split&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;split&#39;</span><span class="p">)</span>
 <span class="go">    &#39;{&quot;columns&quot;:[&quot;col 1&quot;,&quot;col 2&quot;],&quot;index&quot;:[&quot;row 1&quot;,&quot;row 2&quot;],&quot;data&quot;:[[&quot;a&quot;,&quot;b&quot;],[&quot;c&quot;,&quot;d&quot;]]}&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="n">_</span><span class="p">),</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;split&#39;</span><span class="p">)</span>
 <span class="go">      col 1 col 2</span>
 <span class="go">row 1     a     b</span>
 <span class="go">row 2     c     d</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;index&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
 <span class="go">&#39;{&quot;row 1&quot;:{&quot;col 1&quot;:&quot;a&quot;,&quot;col 2&quot;:&quot;b&quot;},&quot;row 2&quot;:{&quot;col 1&quot;:&quot;c&quot;,&quot;col 2&quot;:&quot;d&quot;}}&#39;</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="n">_</span><span class="p">),</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
 <span class="go">      col 1 col 2</span>
 <span class="go">row 1     a     b</span>
 <span class="go">row 2     c     d</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;records&#39;`` formatted JSON.</span>
 <span class="go">Note that index labels are not preserved with this encoding.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;records&#39;</span><span class="p">)</span>
 <span class="go">&#39;[{&quot;col 1&quot;:&quot;a&quot;,&quot;col 2&quot;:&quot;b&quot;},{&quot;col 1&quot;:&quot;c&quot;,&quot;col 2&quot;:&quot;d&quot;}]&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="n">_</span><span class="p">),</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;records&#39;</span><span class="p">)</span>
 <span class="go">  col 1 col 2</span>
 <span class="go">0     a     b</span>
 <span class="go">1     c     d</span>

 <span class="go">Encoding with Table Schema</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;table&#39;</span><span class="p">)</span>
 <span class="go">    &#39;{&quot;schema&quot;:{&quot;fields&quot;:[{&quot;name&quot;:&quot;index&quot;,&quot;type&quot;:&quot;string&quot;},{&quot;name&quot;:&quot;col 1&quot;,&quot;type&quot;:&quot;string&quot;},{&quot;name&quot;:&quot;col 2&quot;,&quot;type&quot;:&quot;string&quot;}],&quot;primaryKey&quot;:[&quot;index&quot;],&quot;pandas_version&quot;:&quot;1.4.0&quot;},&quot;data&quot;:[{&quot;index&quot;:&quot;row 1&quot;,&quot;col 1&quot;:&quot;a&quot;,&quot;col 2&quot;:&quot;b&quot;},{&quot;index&quot;:&quot;row 2&quot;,&quot;col 1&quot;:&quot;c&quot;,&quot;col 2&quot;:&quot;d&quot;}]}&#39;</span>

 <span class="go">The following example uses ``dtype_backend=&quot;numpy_nullable&quot;``</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="s1">&#39;&#39;&#39;{&quot;index&quot;: {&quot;0&quot;: 0, &quot;1&quot;: 1},</span>
 <span class="gp">... </span><span class="s1">       &quot;a&quot;: {&quot;0&quot;: 1, &quot;1&quot;: null},</span>
 <span class="gp">... </span><span class="s1">       &quot;b&quot;: {&quot;0&quot;: 2.5, &quot;1&quot;: 4.5},</span>
 <span class="gp">... </span><span class="s1">       &quot;c&quot;: {&quot;0&quot;: true, &quot;1&quot;: false},</span>
 <span class="gp">... </span><span class="s1">       &quot;d&quot;: {&quot;0&quot;: &quot;a&quot;, &quot;1&quot;: &quot;b&quot;},</span>
 <span class="gp">... </span><span class="s1">       &quot;e&quot;: {&quot;0&quot;: 1577.2, &quot;1&quot;: 1577.1}}&#39;&#39;&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="n">data</span><span class="p">),</span> <span class="n">dtype_backend</span><span class="o">=</span><span class="s2">&quot;numpy_nullable&quot;</span><span class="p">)</span>
 <span class="go">   index     a    b      c  d       e</span>
 <span class="go">0      0     1  2.5   True  a  1577.2</span>
 <span class="go">1      1  &lt;NA&gt;  4.5  False  b  1577.1</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_json">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">orient</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#to_json"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.to_json" title="Link to this definition"></a></dt>
 <dd><p>Convert the object to a JSON string.</p>
 <p>Note NaN’s and None will be converted to null and datetime objects
 will be converted to UNIX timestamps.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em><em>, or </em><em>None</em><em>, </em><em>default None</em>) – String, path object (implementing os.PathLike[str]), or file-like
 object implementing a write() function. If None, the result is
 returned as a string.</p></li>
 <li><p><strong>orient</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – <p>Indication of expected JSON string format.</p>
 <ul>
 <li><p>DeferredSeries:</p>
 <blockquote>
 <div><ul class="simple">
 <li><p>default is ‘index’</p></li>
 <li><p>allowed values are: {‘split’, ‘records’, ‘index’, ‘table’}.</p></li>
 </ul>
 </div></blockquote>
 </li>
 <li><p>DeferredDataFrame:</p>
 <blockquote>
 <div><ul class="simple">
 <li><p>default is ‘columns’</p></li>
 <li><p>allowed values are: {‘split’, ‘records’, ‘index’, ‘columns’,
 ‘values’, ‘table’}.</p></li>
 </ul>
 </div></blockquote>
 </li>
 <li><p>The format of the JSON string:</p>
 <blockquote>
 <div><ul class="simple">
 <li><p>’split’ : dict like {‘index’ -&gt; [index], ‘columns’ -&gt; [columns],
 ‘data’ -&gt; [values]}</p></li>
 <li><p>’records’ : list like [{column -&gt; value}, … , {column -&gt; value}]</p></li>
 <li><p>’index’ : dict like {index -&gt; {column -&gt; value}}</p></li>
 <li><p>’columns’ : dict like {column -&gt; {index -&gt; value}}</p></li>
 <li><p>’values’ : just the values array</p></li>
 <li><p>’table’ : dict like {‘schema’: {schema}, ‘data’: {data}}</p></li>
 </ul>
 <p>Describing the data, where data component is like <code class="docutils literal notranslate"><span class="pre">orient='records'</span></code>.</p>
 </div></blockquote>
 </li>
 </ul>
 </p></li>
 <li><p><strong>date_format</strong> (<em>{None</em><em>, </em><em>'epoch'</em><em>, </em><em>'iso'}</em>) – Type of date conversion. ‘epoch’ = epoch milliseconds,
 ‘iso’ = ISO8601. The default depends on the <cite>orient</cite>. For
 <code class="docutils literal notranslate"><span class="pre">orient='table'</span></code>, the default is ‘iso’. For all other orients,
 the default is ‘epoch’.</p></li>
 <li><p><strong>double_precision</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 10</em>) – The number of decimal places to use when encoding
 floating point values. The possible maximal value is 15.
 Passing double_precision greater than 15 will raise a ValueError.</p></li>
 <li><p><strong>force_ascii</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Force encoded string to be ASCII.</p></li>
 <li><p><strong>date_unit</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default 'ms'</em><em> (</em><em>milliseconds</em><em>)</em>) – The time unit to encode to, governs timestamp and ISO8601
 precision.  One of ‘s’, ‘ms’, ‘us’, ‘ns’ for second, millisecond,
 microsecond, and nanosecond respectively.</p></li>
 <li><p><strong>default_handler</strong> (<em>callable</em><em>, </em><em>default None</em>) – Handler to call if object cannot otherwise be converted to a
 suitable format for JSON. Should receive a single argument which is
 the object to convert and return a serialisable object.</p></li>
 <li><p><strong>lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – If ‘orient’ is ‘records’ write out line-delimited json format. Will
 throw ValueError if incorrect ‘orient’ since others are not
 list-like.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output data. If ‘infer’ and ‘path_or_buf’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdCompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for faster compression and to create
 a reproducible gzip archive:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'gzip',</span> <span class="pre">'compresslevel':</span> <span class="pre">1,</span> <span class="pre">'mtime':</span> <span class="pre">1}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified changed">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </p></li>
 <li><p><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em> or </em><em>None</em><em>, </em><em>default None</em>) – The index is only used when ‘orient’ is ‘split’, ‘index’, ‘column’,
 or ‘table’. Of these, ‘index’ and ‘column’ do not support
 <cite>index=False</cite>.</p></li>
 <li><p><strong>indent</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Length of whitespace used to indent each record.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 <li><p><strong>mode</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default 'w'</em><em> (</em><em>writing</em><em>)</em>) – Specify the IO mode for output when supplying a path_or_buf.
 Accepted args are ‘w’ (writing) and ‘a’ (append) only.
 mode=’a’ is only supported when lines is True and orient is ‘records’.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>If path_or_buf is None, returns the resulting json format as a
 string. Otherwise returns None.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p>None or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a></p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_json" title="apache_beam.dataframe.io.read_json"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_json</span></code></a></dt><dd><p>Convert a JSON string to pandas object.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>The behavior of <code class="docutils literal notranslate"><span class="pre">indent=0</span></code> varies from the stdlib, which does not
 indent the output but does insert newlines. Currently, <code class="docutils literal notranslate"><span class="pre">indent=0</span></code>
 and the default <code class="docutils literal notranslate"><span class="pre">indent=None</span></code> are equivalent in pandas, though this
 may change in a future release.</p>
 <p><code class="docutils literal notranslate"><span class="pre">orient='table'</span></code> contains a ‘pandas_version’ field under ‘schema’.
 This stores the version of <cite>pandas</cite> used in the latest revision of the
 schema.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">json</span><span class="w"> </span><span class="kn">import</span> <span class="n">loads</span><span class="p">,</span> <span class="n">dumps</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
 <span class="gp">... </span>    <span class="p">[[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="s2">&quot;d&quot;</span><span class="p">]],</span>
 <span class="gp">... </span>    <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;row 1&quot;</span><span class="p">,</span> <span class="s2">&quot;row 2&quot;</span><span class="p">],</span>
 <span class="gp">... </span>    <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;col 1&quot;</span><span class="p">,</span> <span class="s2">&quot;col 2&quot;</span><span class="p">],</span>
 <span class="gp">... </span><span class="p">)</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;split&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;columns&quot;: [</span>
 <span class="go">        &quot;col 1&quot;,</span>
 <span class="go">        &quot;col 2&quot;</span>
 <span class="go">    ],</span>
 <span class="go">    &quot;index&quot;: [</span>
 <span class="go">        &quot;row 1&quot;,</span>
 <span class="go">        &quot;row 2&quot;</span>
 <span class="go">    ],</span>
 <span class="go">    &quot;data&quot;: [</span>
 <span class="go">        [</span>
 <span class="go">            &quot;a&quot;,</span>
 <span class="go">            &quot;b&quot;</span>
 <span class="go">        ],</span>
 <span class="go">        [</span>
 <span class="go">            &quot;c&quot;,</span>
 <span class="go">            &quot;d&quot;</span>
 <span class="go">        ]</span>
 <span class="go">    ]</span>
 <span class="go">}</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;records&#39;`` formatted JSON.</span>
 <span class="go">Note that index labels are not preserved with this encoding.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;records&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">[</span>
 <span class="go">    {</span>
 <span class="go">        &quot;col 1&quot;: &quot;a&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;b&quot;</span>
 <span class="go">    },</span>
 <span class="go">    {</span>
 <span class="go">        &quot;col 1&quot;: &quot;c&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;d&quot;</span>
 <span class="go">    }</span>
 <span class="go">]</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;index&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;index&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;row 1&quot;: {</span>
 <span class="go">        &quot;col 1&quot;: &quot;a&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;b&quot;</span>
 <span class="go">    },</span>
 <span class="go">    &quot;row 2&quot;: {</span>
 <span class="go">        &quot;col 1&quot;: &quot;c&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;d&quot;</span>
 <span class="go">    }</span>
 <span class="go">}</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;columns&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;col 1&quot;: {</span>
 <span class="go">        &quot;row 1&quot;: &quot;a&quot;,</span>
 <span class="go">        &quot;row 2&quot;: &quot;c&quot;</span>
 <span class="go">    },</span>
 <span class="go">    &quot;col 2&quot;: {</span>
 <span class="go">        &quot;row 1&quot;: &quot;b&quot;,</span>
 <span class="go">        &quot;row 2&quot;: &quot;d&quot;</span>
 <span class="go">    }</span>
 <span class="go">}</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;values&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;values&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">[</span>
 <span class="go">    [</span>
 <span class="go">        &quot;a&quot;,</span>
 <span class="go">        &quot;b&quot;</span>
 <span class="go">    ],</span>
 <span class="go">    [</span>
 <span class="go">        &quot;c&quot;,</span>
 <span class="go">        &quot;d&quot;</span>
 <span class="go">    ]</span>
 <span class="go">]</span>

 <span class="go">Encoding with Table Schema:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;table&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;schema&quot;: {</span>
 <span class="go">        &quot;fields&quot;: [</span>
 <span class="go">            {</span>
 <span class="go">                &quot;name&quot;: &quot;index&quot;,</span>
 <span class="go">                &quot;type&quot;: &quot;string&quot;</span>
 <span class="go">            },</span>
 <span class="go">            {</span>
 <span class="go">                &quot;name&quot;: &quot;col 1&quot;,</span>
 <span class="go">                &quot;type&quot;: &quot;string&quot;</span>
 <span class="go">            },</span>
 <span class="go">            {</span>
 <span class="go">                &quot;name&quot;: &quot;col 2&quot;,</span>
 <span class="go">                &quot;type&quot;: &quot;string&quot;</span>
 <span class="go">            }</span>
 <span class="go">        ],</span>
 <span class="go">        &quot;primaryKey&quot;: [</span>
 <span class="go">            &quot;index&quot;</span>
 <span class="go">        ],</span>
 <span class="go">        &quot;pandas_version&quot;: &quot;1.4.0&quot;</span>
 <span class="go">    },</span>
 <span class="go">    &quot;data&quot;: [</span>
 <span class="go">        {</span>
 <span class="go">            &quot;index&quot;: &quot;row 1&quot;,</span>
 <span class="go">            &quot;col 1&quot;: &quot;a&quot;,</span>
 <span class="go">            &quot;col 2&quot;: &quot;b&quot;</span>
 <span class="go">        },</span>
 <span class="go">        {</span>
 <span class="go">            &quot;index&quot;: &quot;row 2&quot;,</span>
 <span class="go">            &quot;col 1&quot;: &quot;c&quot;,</span>
 <span class="go">            &quot;col 2&quot;: &quot;d&quot;</span>
 <span class="go">        }</span>
 <span class="go">    ]</span>
 <span class="go">}</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_html">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_html</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_html"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_html" title="Link to this definition"></a></dt>
 <dd><p>Read HTML tables into a <code class="docutils literal notranslate"><span class="pre">list</span></code> of <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> objects.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>io</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – <p>String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a string <code class="docutils literal notranslate"><span class="pre">read()</span></code> function.
 The string can represent a URL or the HTML itself. Note that
 lxml only accepts the http, ftp and file url protocols. If you have a
 URL that starts with <code class="docutils literal notranslate"><span class="pre">'https'</span></code> you might try removing the <code class="docutils literal notranslate"><span class="pre">'s'</span></code>.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.1.0: </span>Passing html literal strings is deprecated.
 Wrap literal string/bytes input in <code class="docutils literal notranslate"><span class="pre">io.StringIO</span></code>/<code class="docutils literal notranslate"><span class="pre">io.BytesIO</span></code> instead.</p>
 </div>
 </p></li>
 <li><p><strong>match</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><em>compiled regular expression</em><em>, </em><em>optional</em>) – The set of tables containing text matching this regex or string will be
 returned. Unless the HTML is extremely simple you will probably need to
 pass a non-empty string here. Defaults to ‘.+’ (match any non-empty
 string). The default value will return all tables contained on a page.
 This value is converted to a regular expression so that there is
 consistent behavior between Beautiful Soup and lxml.</p></li>
 <li><p><strong>flavor</strong> (<em>{&quot;lxml&quot;</em><em>, </em><em>&quot;html5lib&quot;</em><em>, </em><em>&quot;bs4&quot;}</em><em> or </em><em>list-like</em><em>, </em><em>optional</em>) – The parsing engine (or list of parsing engines) to use. ‘bs4’ and
 ‘html5lib’ are synonymous with each other, they are both there for
 backwards compatibility. The default of <code class="docutils literal notranslate"><span class="pre">None</span></code> tries to use <code class="docutils literal notranslate"><span class="pre">lxml</span></code>
 to parse and if that fails it falls back on <code class="docutils literal notranslate"><span class="pre">bs4</span></code> + <code class="docutils literal notranslate"><span class="pre">html5lib</span></code>.</p></li>
 <li><p><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em> or </em><em>list-like</em><em>, </em><em>optional</em>) – The row (or list of rows for a <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.MultiIndex.html#pandas.MultiIndex" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><code class="xref py py-class docutils literal notranslate"><span class="pre">MultiIndex</span></code></a>) to use to
 make the columns headers.</p></li>
 <li><p><strong>index_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em> or </em><em>list-like</em><em>, </em><em>optional</em>) – The column (or list of columns) to use to create the index.</p></li>
 <li><p><strong>skiprows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>list-like</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#slice" title="(in Python v3.13)"><em>slice</em></a><em>, </em><em>optional</em>) – Number of rows to skip after parsing the column integer. 0-based. If a
 sequence of integers or a slice is given, will skip the rows indexed by
 that sequence.  Note that a single element sequence means ‘skip the nth
 row’ whereas an integer means ‘skip n rows’.</p></li>
 <li><p><strong>attrs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>This is a dictionary of attributes that you can pass to use to identify
 the table in the HTML. These are not checked for validity before being
 passed to lxml or Beautiful Soup. However, these attributes must be
 valid HTML table attributes to work correctly. For example,</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">attrs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;id&#39;</span><span class="p">:</span> <span class="s1">&#39;table&#39;</span><span class="p">}</span>
 </pre></div>
 </div>
 <p>is a valid attribute dictionary because the ‘id’ HTML tag attribute is
 a valid HTML attribute for <em>any</em> HTML tag as per <a class="reference external" href="https://html.spec.whatwg.org/multipage/dom.html#global-attributes">this document</a>.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">attrs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;asdf&#39;</span><span class="p">:</span> <span class="s1">&#39;table&#39;</span><span class="p">}</span>
 </pre></div>
 </div>
 <p>is <em>not</em> a valid attribute dictionary because ‘asdf’ is not a valid
 HTML attribute even if it is a valid XML attribute.  Valid HTML 4.01
 table attributes can be found <a class="reference external" href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2">here</a>. A
 working draft of the HTML 5 spec can be found <a class="reference external" href="https://html.spec.whatwg.org/multipage/tables.html">here</a>. It contains the
 latest information on table attributes for the modern web.</p>
 </p></li>
 <li><p><strong>parse_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – See <a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a> for more details.</p></li>
 <li><p><strong>thousands</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – Separator to use to parse thousands. Defaults to <code class="docutils literal notranslate"><span class="pre">','</span></code>.</p></li>
 <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – The encoding used to decode the web page. Defaults to <code class="docutils literal notranslate"><span class="pre">None</span></code>.``None``
 preserves the previous encoding behavior, which depends on the
 underlying parser library (e.g., the parser library will try to use
 the encoding provided by the document).</p></li>
 <li><p><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character to recognize as decimal point (e.g. use ‘,’ for European
 data).</p></li>
 <li><p><strong>converters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default None</em>) – Dict of functions for converting values in certain columns. Keys can
 either be integers or column labels, values are functions that take one
 input argument, the cell (not column) content, and return the
 transformed content.</p></li>
 <li><p><strong>na_values</strong> (<em>iterable</em><em>, </em><em>default None</em>) – Custom NA values.</p></li>
 <li><p><strong>keep_default_na</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – If na_values are specified and keep_default_na is False the default NaN
 values are overridden, otherwise they’re appended to.</p></li>
 <li><p><strong>displayed_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether elements with “display: none” should be parsed.</p></li>
 <li><p><strong>extract_links</strong> (<em>{None</em><em>, </em><em>&quot;all&quot;</em><em>, </em><em>&quot;header&quot;</em><em>, </em><em>&quot;body&quot;</em><em>, </em><em>&quot;footer&quot;}</em>) – <p>Table elements in the specified section(s) with &lt;a&gt; tags will have their
 href extracted.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.1.0.</span></p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>A list of DeferredDataFrames.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p>dfs</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_csv</span></code></a></dt><dd><p>Read a comma-separated values (csv) file into DeferredDataFrame.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>Before using this function you should read the <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-html-gotchas" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><span class="xref std std-ref">gotchas about the
 HTML parsing libraries</span></a>.</p>
 <p>Expect to do some cleanup after you call this function. For example, you
 might need to manually assign column names if the column names are
 converted to NaN when you pass the <cite>header=0</cite> argument. We try to assume as
 little as possible about the structure of the table and push the
 idiosyncrasies of the HTML contained in the table to the user.</p>
 <p>This function searches for <code class="docutils literal notranslate"><span class="pre">&lt;table&gt;</span></code> elements and only for <code class="docutils literal notranslate"><span class="pre">&lt;tr&gt;</span></code>
 and <code class="docutils literal notranslate"><span class="pre">&lt;th&gt;</span></code> rows and <code class="docutils literal notranslate"><span class="pre">&lt;td&gt;</span></code> elements within each <code class="docutils literal notranslate"><span class="pre">&lt;tr&gt;</span></code> or <code class="docutils literal notranslate"><span class="pre">&lt;th&gt;</span></code>
 element in the table. <code class="docutils literal notranslate"><span class="pre">&lt;td&gt;</span></code> stands for “table data”. This function
 attempts to properly handle <code class="docutils literal notranslate"><span class="pre">colspan</span></code> and <code class="docutils literal notranslate"><span class="pre">rowspan</span></code> attributes.
 If the function has a <code class="docutils literal notranslate"><span class="pre">&lt;thead&gt;</span></code> argument, it is used to construct
 the header, otherwise the function attempts to find the header within
 the body (by putting rows with only <code class="docutils literal notranslate"><span class="pre">&lt;th&gt;</span></code> elements into the header).</p>
 <p>Similar to <a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a> the <cite>header</cite> argument is applied
 <strong>after</strong> <cite>skiprows</cite> is applied.</p>
 <p>This function will <em>always</em> return a list of <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> <em>or</em>
 it will fail, e.g., it will <em>not</em> return an empty list.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">See the :ref:`read_html documentation in the IO section of the docs</span>
 <span class="go">&lt;io.read_html&gt;` for some examples of reading in HTML tables.</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_html">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_html</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#to_html"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.to_html" title="Link to this definition"></a></dt>
 <dd><p>Render a DataFrame as an HTML table.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>Path</em><em> or </em><em>StringIO-like</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Buffer to write to. If None, the output is returned as a string.</p></li>
 <li><p><strong>columns</strong> (<em>array-like</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – The subset of columns to write. Writes all columns by default.</p></li>
 <li><p><strong>col_space</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – The minimum width of each column in CSS length units.  An int is assumed to be px units..</p></li>
 <li><p><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em>) – Whether to print column labels, default True.</p></li>
 <li><p><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Whether to print index (row) labels.</p></li>
 <li><p><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default 'NaN'</em>) – String representation of <code class="docutils literal notranslate"><span class="pre">NaN</span></code> to use.</p></li>
 <li><p><strong>formatters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.13)"><em>tuple</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>one-param. functions</em><em>, </em><em>optional</em>) – Formatter functions to apply to columns’ elements by position or
 name.
 The result of each function must be a unicode string.
 List/tuple must be of length equal to the number of columns.</p></li>
 <li><p><strong>float_format</strong> (<em>one-parameter function</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Formatter function to apply to columns’ elements if they are
 floats. This function must return a unicode string and will be
 applied only to the non-<code class="docutils literal notranslate"><span class="pre">NaN</span></code> elements, with <code class="docutils literal notranslate"><span class="pre">NaN</span></code> being
 handled by <code class="docutils literal notranslate"><span class="pre">na_rep</span></code>.</p></li>
 <li><p><strong>sparsify</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Set to False for a DeferredDataFrame with a hierarchical index to print
 every multiindex key at each row.</p></li>
 <li><p><strong>index_names</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Prints the names of the indexes.</p></li>
 <li><p><strong>justify</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default None</em>) – <p>How to justify the column labels. If None uses the option from
 the print configuration (controlled by set_option), ‘right’ out
 of the box. Valid values are</p>
 <ul>
 <li><p>left</p></li>
 <li><p>right</p></li>
 <li><p>center</p></li>
 <li><p>justify</p></li>
 <li><p>justify-all</p></li>
 <li><p>start</p></li>
 <li><p>end</p></li>
 <li><p>inherit</p></li>
 <li><p>match-parent</p></li>
 <li><p>initial</p></li>
 <li><p>unset.</p></li>
 </ul>
 </p></li>
 <li><p><strong>max_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of rows to display in the console.</p></li>
 <li><p><strong>max_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of columns to display in the console.</p></li>
 <li><p><strong>show_dimensions</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Display DeferredDataFrame dimensions (number of rows by number of columns).</p></li>
 <li><p><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator, e.g. ‘,’ in Europe.</p></li>
 <li><p><strong>bold_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Make the row labels bold in the output.</p></li>
 <li><p><strong>classes</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.13)"><em>tuple</em></a><em>, </em><em>default None</em>) – CSS class(es) to apply to the resulting html table.</p></li>
 <li><p><strong>escape</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Convert the characters &lt;, &gt;, and &amp; to HTML-safe sequences.</p></li>
 <li><p><strong>notebook</strong> (<em>{True</em><em>, </em><em>False}</em><em>, </em><em>default False</em>) – Whether the generated HTML is for IPython Notebook.</p></li>
 <li><p><strong>border</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – A <code class="docutils literal notranslate"><span class="pre">border=border</span></code> attribute is included in the opening
 <cite>&lt;table&gt;</cite> tag. Default <code class="docutils literal notranslate"><span class="pre">pd.options.display.html.border</span></code>.</p></li>
 <li><p><strong>table_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – A css id is included in the opening <cite>&lt;table&gt;</cite> tag if specified.</p></li>
 <li><p><strong>render_links</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Convert URLs to HTML links.</p></li>
 <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default &quot;utf-8&quot;</em>) – Set character encoding.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>If buf is None, returns the result as a string. Otherwise returns
 None.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a> or None</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_string</span></code></dt><dd><p>Convert DeferredDataFrame to a string.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">html_string</span> <span class="o">=</span> <span class="s1">&#39;&#39;&#39;&lt;table border=&quot;1&quot; class=&quot;dataframe&quot;&gt;</span>
 <span class="gp">... </span><span class="s1">  &lt;thead&gt;</span>
 <span class="gp">... </span><span class="s1">    &lt;tr style=&quot;text-align: right;&quot;&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;th&gt;&lt;/th&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;th&gt;col1&lt;/th&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;th&gt;col2&lt;/th&gt;</span>
 <span class="gp">... </span><span class="s1">    &lt;/tr&gt;</span>
 <span class="gp">... </span><span class="s1">  &lt;/thead&gt;</span>
 <span class="gp">... </span><span class="s1">  &lt;tbody&gt;</span>
 <span class="gp">... </span><span class="s1">    &lt;tr&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;th&gt;0&lt;/th&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;td&gt;1&lt;/td&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;td&gt;4&lt;/td&gt;</span>
 <span class="gp">... </span><span class="s1">    &lt;/tr&gt;</span>
 <span class="gp">... </span><span class="s1">    &lt;tr&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;th&gt;1&lt;/th&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;td&gt;2&lt;/td&gt;</span>
 <span class="gp">... </span><span class="s1">      &lt;td&gt;3&lt;/td&gt;</span>
 <span class="gp">... </span><span class="s1">    &lt;/tr&gt;</span>
 <span class="gp">... </span><span class="s1">  &lt;/tbody&gt;</span>
 <span class="gp">... </span><span class="s1">&lt;/table&gt;&#39;&#39;&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="k">assert</span> <span class="n">html_string</span> <span class="o">==</span> <span class="n">df</span><span class="o">.</span><span class="n">to_html</span><span class="p">()</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py class">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.ReadViaPandas">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">ReadViaPandas</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">format</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">include_indexes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">objects_as_strings</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#ReadViaPandas"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.ReadViaPandas" title="Link to this definition"></a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code></a></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.ReadViaPandas.expand">
 <span class="sig-name descname"><span class="pre">expand</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#ReadViaPandas.expand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.ReadViaPandas.expand" title="Link to this definition"></a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="py class">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.WriteViaPandas">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">WriteViaPandas</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">format</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#WriteViaPandas"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.WriteViaPandas" title="Link to this definition"></a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code></a></p>
 <dl class="py method">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.WriteViaPandas.expand">
 <span class="sig-name descname"><span class="pre">expand</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pcoll</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#WriteViaPandas.expand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.dataframe.io.WriteViaPandas.expand" title="Link to this definition"></a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_excel">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_excel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_excel" title="Link to this definition"></a></dt>
 <dd><p>Read an Excel file into a <code class="docutils literal notranslate"><span class="pre">pandas</span></code> <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code>.</p>
 <p>Supports <cite>xls</cite>, <cite>xlsx</cite>, <cite>xlsm</cite>, <cite>xlsb</cite>, <cite>odf</cite>, <cite>ods</cite> and <cite>odt</cite> file extensions
 read from a local filesystem or URL. Supports an option to read
 a single sheet or a list of sheets.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>io</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#bytes" title="(in Python v3.13)"><em>bytes</em></a><em>, </em><em>ExcelFile</em><em>, </em><em>xlrd.Book</em><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be: <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.xlsx</span></code>.</p>
 <p>If you want to pass in a path object, pandas accepts any <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method,
 such as a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function)
 or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.1.0: </span>Passing byte strings is deprecated. To read from a
 byte string, wrap it in a <code class="docutils literal notranslate"><span class="pre">BytesIO</span></code> object.</p>
 </div>
 </p></li>
 <li><p><strong>sheet_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, or </em><em>None</em><em>, </em><em>default 0</em>) – <p>Strings are used for sheet names. Integers are used in zero-indexed
 sheet positions (chart sheets do not count as a sheet position).
 Lists of strings/integers are used to request multiple sheets.
 Specify <code class="docutils literal notranslate"><span class="pre">None</span></code> to get all worksheets.</p>
 <p>Available cases:</p>
 <ul>
 <li><p>Defaults to <code class="docutils literal notranslate"><span class="pre">0</span></code>: 1st sheet as a <cite>DeferredDataFrame</cite></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">1</span></code>: 2nd sheet as a <cite>DeferredDataFrame</cite></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;Sheet1&quot;</span></code>: Load sheet with name “Sheet1”</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">1,</span> <span class="pre">&quot;Sheet5&quot;]</span></code>: Load first, second and sheet named “Sheet5”
 as a dict of <cite>DeferredDataFrame</cite></p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">None</span></code>: All worksheets.</p></li>
 </ul>
 </p></li>
 <li><p><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 0</em>) – Row (0-indexed) to use for the column labels of the parsed
 DeferredDataFrame. If a list of integers is passed those row positions will
 be combined into a <code class="docutils literal notranslate"><span class="pre">MultiIndex</span></code>. Use None if there is no header.</p></li>
 <li><p><strong>names</strong> (<em>array-like</em><em>, </em><em>default None</em>) – List of column names to use. If file contains no header row,
 then you should explicitly pass header=None.</p></li>
 <li><p><strong>index_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default None</em>) – <p>Column (0-indexed) to use as the row labels of the DeferredDataFrame.
 Pass None if there is no such column.  If a list is passed,
 those columns will be combined into a <code class="docutils literal notranslate"><span class="pre">MultiIndex</span></code>.  If a
 subset of data is selected with <code class="docutils literal notranslate"><span class="pre">usecols</span></code>, index_col
 is based on the subset.</p>
 <p>Missing values will be forward filled to allow roundtripping with
 <code class="docutils literal notranslate"><span class="pre">to_excel</span></code> for <code class="docutils literal notranslate"><span class="pre">merged_cells=True</span></code>. To avoid forward filling the
 missing values use <code class="docutils literal notranslate"><span class="pre">set_index</span></code> after reading the data instead of
 <code class="docutils literal notranslate"><span class="pre">index_col</span></code>.</p>
 </p></li>
 <li><p><strong>usecols</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>list-like</em><em>, or </em><em>callable</em><em>, </em><em>default None</em>) – <ul>
 <li><p>If None, then parse all columns.</p></li>
 <li><p>If str, then indicates comma separated list of Excel column letters
 and column ranges (e.g. “A:E” or “A,C,E:F”). Ranges are inclusive of
 both sides.</p></li>
 <li><p>If list of int, then indicates list of column numbers to be parsed
 (0-indexed).</p></li>
 <li><p>If list of string, then indicates list of column names to be parsed.</p></li>
 <li><p>If callable, then evaluate each column name against it and parse the
 column if the callable returns <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p></li>
 </ul>
 <p>Returns a subset of the columns according to behavior above.</p>
 </p></li>
 <li><p><strong>dtype</strong> (<em>Type name</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>column -&gt; type</em><em>, </em><em>default None</em>) – Data type for data or columns. E.g. {‘a’: np.float64, ‘b’: np.int32}
 Use <code class="docutils literal notranslate"><span class="pre">object</span></code> to preserve data as stored in Excel and not interpret dtype,
 which will necessarily result in <code class="docutils literal notranslate"><span class="pre">object</span></code> dtype.
 If converters are specified, they will be applied INSTEAD
 of dtype conversion.
 If you use <code class="docutils literal notranslate"><span class="pre">None</span></code>, it will infer the dtype of each column based on the data.</p></li>
 <li><p><strong>engine</strong> (<em>{'openpyxl'</em><em>, </em><em>'calamine'</em><em>, </em><em>'odf'</em><em>, </em><em>'pyxlsb'</em><em>, </em><em>'xlrd'}</em><em>, </em><em>default None</em>) – <p>If io is not a buffer or path, this must be set to identify io.
 Engine compatibility :</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">openpyxl</span></code> supports newer Excel file formats.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">calamine</span></code> supports Excel (.xls, .xlsx, .xlsm, .xlsb)
 and OpenDocument (.ods) file formats.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">odf</span></code> supports OpenDocument file formats (.odf, .ods, .odt).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">pyxlsb</span></code> supports Binary Excel files.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">xlrd</span></code> supports old-style Excel files (.xls).</p></li>
 </ul>
 <p>When <code class="docutils literal notranslate"><span class="pre">engine=None</span></code>, the following logic will be used to determine the engine:</p>
 <ul>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">path_or_buffer</span></code> is an OpenDocument format (.odf, .ods, .odt),
 then <a class="reference external" href="https://pypi.org/project/odfpy/">odf</a> will be used.</p></li>
 <li><p>Otherwise if <code class="docutils literal notranslate"><span class="pre">path_or_buffer</span></code> is an xls format, <code class="docutils literal notranslate"><span class="pre">xlrd</span></code> will be used.</p></li>
 <li><p>Otherwise if <code class="docutils literal notranslate"><span class="pre">path_or_buffer</span></code> is in xlsb format, <code class="docutils literal notranslate"><span class="pre">pyxlsb</span></code> will be used.</p></li>
 <li><p>Otherwise <code class="docutils literal notranslate"><span class="pre">openpyxl</span></code> will be used.</p></li>
 </ul>
 </p></li>
 <li><p><strong>converters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default None</em>) – Dict of functions for converting values in certain columns. Keys can
 either be integers or column labels, values are functions that take one
 input argument, the Excel cell content, and return the transformed
 content.</p></li>
 <li><p><strong>true_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>default None</em>) – Values to consider as True.</p></li>
 <li><p><strong>false_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>default None</em>) – Values to consider as False.</p></li>
 <li><p><strong>skiprows</strong> (<em>list-like</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, or </em><em>callable</em><em>, </em><em>optional</em>) – Line numbers to skip (0-indexed) or number of lines to skip (int) at the
 start of the file. If callable, the callable function will be evaluated
 against the row indices, returning True if the row should be skipped and
 False otherwise. An example of a valid callable argument would be <code class="docutils literal notranslate"><span class="pre">lambda</span>
 <span class="pre">x:</span> <span class="pre">x</span> <span class="pre">in</span> <span class="pre">[0,</span> <span class="pre">2]</span></code>.</p></li>
 <li><p><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default None</em>) – Number of rows to parse.</p></li>
 <li><p><strong>na_values</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>list-like</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default None</em>) – Additional strings to recognize as NA/NaN. If dict passed, specific
 per-column NA values. By default the following values are interpreted
 as NaN: ‘’, ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, ‘-NaN’, ‘-nan’,
 ‘1.#IND’, ‘1.#QNAN’, ‘&lt;NA&gt;’, ‘N/A’, ‘NA’, ‘NULL’, ‘NaN’, ‘None’,
 ‘n/a’, ‘nan’, ‘null’.</p></li>
 <li><p><strong>keep_default_na</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>Whether or not to include the default NaN values when parsing the data.
 Depending on whether <code class="docutils literal notranslate"><span class="pre">na_values</span></code> is passed in, the behavior is as follows:</p>
 <ul>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is True, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are specified,
 <code class="docutils literal notranslate"><span class="pre">na_values</span></code> is appended to the default NaN values used for parsing.</p></li>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is True, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are not specified, only
 the default NaN values are used for parsing.</p></li>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is False, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are specified, only
 the NaN values specified <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are used for parsing.</p></li>
 <li><p>If <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> is False, and <code class="docutils literal notranslate"><span class="pre">na_values</span></code> are not specified, no
 strings will be parsed as NaN.</p></li>
 </ul>
 <p>Note that if <cite>na_filter</cite> is passed in as False, the <code class="docutils literal notranslate"><span class="pre">keep_default_na</span></code> and
 <code class="docutils literal notranslate"><span class="pre">na_values</span></code> parameters will be ignored.</p>
 </p></li>
 <li><p><strong>na_filter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Detect missing value markers (empty strings and the value of na_values). In
 data without any NAs, passing <code class="docutils literal notranslate"><span class="pre">na_filter=False</span></code> can improve the
 performance of reading a large file.</p></li>
 <li><p><strong>verbose</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Indicate number of NA values placed in non-numeric columns.</p></li>
 <li><p><strong>parse_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>list-like</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default False</em>) – <p>The behavior is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">bool</span></code>. If True -&gt; try parsing the index.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">list</span></code> of int or names. e.g. If [1, 2, 3] -&gt; try parsing columns 1, 2, 3
 each as a separate date column.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">list</span></code> of lists. e.g.  If [[1, 3]] -&gt; combine columns 1 and 3 and parse as
 a single date column.</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">dict</span></code>, e.g. {‘foo’ : [1, 3]} -&gt; parse columns 1, 3 as date and call
 result ‘foo’</p></li>
 </ul>
 <p>If a column or index contains an unparsable date, the entire column or
 index will be returned unaltered as an object data type. If you don`t want to
 parse some cells as date just change their type in Excel to “Text”.
 For non-standard datetime parsing, use <code class="docutils literal notranslate"><span class="pre">pd.to_datetime</span></code> after <code class="docutils literal notranslate"><span class="pre">pd.read_excel</span></code>.</p>
 <p>Note: A fast-path exists for iso8601-formatted dates.</p>
 </p></li>
 <li><p><strong>date_parser</strong> (<em>function</em><em>, </em><em>optional</em>) – <p>Function to use for converting a sequence of string columns to an array of
 datetime instances. The default uses <code class="docutils literal notranslate"><span class="pre">dateutil.parser.parser</span></code> to do the
 conversion. Pandas will try to call <cite>date_parser</cite> in three different ways,
 advancing to the next if an exception occurs: 1) Pass one or more arrays
 (as defined by <cite>parse_dates</cite>) as arguments; 2) concatenate (row-wise) the
 string values from the columns defined by <cite>parse_dates</cite> into a single array
 and pass that; and 3) call <cite>date_parser</cite> once for each row using one or
 more strings (corresponding to the columns defined by <cite>parse_dates</cite>) as
 arguments.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.0.0: </span>Use <code class="docutils literal notranslate"><span class="pre">date_format</span></code> instead, or read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply
 <code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code> as-needed.</p>
 </div>
 </p></li>
 <li><p><strong>date_format</strong> (str or dict of column -&gt; format, default <code class="docutils literal notranslate"><span class="pre">None</span></code>) – <p>If used in conjunction with <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code>, will parse dates according to this
 format. For anything more complex,
 please read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply <code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code> as-needed.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>thousands</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default None</em>) – Thousands separator for parsing string columns to numeric.  Note that
 this parameter is only necessary for columns stored as TEXT in Excel,
 any numeric columns will automatically be parsed, regardless of display
 format.</p></li>
 <li><p><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default '.'</em>) – <p>Character to recognize as decimal point for parsing string columns to numeric.
 Note that this parameter is only necessary for columns stored as TEXT in Excel,
 any numeric columns will automatically be parsed, regardless of display
 format.(e.g. use ‘,’ for European data).</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.4.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>comment</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default None</em>) – Comments out remainder of line. Pass a character or characters to this
 argument to indicate comments in the input file. Any data between the
 comment string and the end of the current line is ignored.</p></li>
 <li><p><strong>skipfooter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 0</em>) – Rows at the end to skip (0-indexed).</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>engine_kwargs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – Arbitrary keyword arguments passed to excel engine.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>DeferredDataFrame from the passed in Excel file. See notes in sheet_name
 argument for more information on when a dict of DeferredDataFrames is returned.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)">dict</a> of DeferredDataFrames</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_excel</span></code></dt><dd><p>Write DeferredDataFrame to an Excel file.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv</span></code></dt><dd><p>Write DeferredDataFrame to a comma-separated values (csv) file.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_csv</span></code></a></dt><dd><p>Read a comma-separated values (csv) file into DeferredDataFrame.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_fwf" title="apache_beam.dataframe.io.read_fwf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_fwf</span></code></a></dt><dd><p>Read a table of fixed-width formatted lines into DeferredDataFrame.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>For specific information on the methods used for each Excel engine, refer to the pandas
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-excel-reader" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><span class="xref std std-ref">user guide</span></a></p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">The file can be read using the file name as string or an open file object:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
 <span class="go">       Name  Value</span>
 <span class="go">0   string1      1</span>
 <span class="go">1   string2      2</span>
 <span class="go">2  #Comment      3</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">),</span>
 <span class="gp">... </span>              <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet3&#39;</span><span class="p">)</span>
 <span class="go">   Unnamed: 0      Name  Value</span>
 <span class="go">0           0   string1      1</span>
 <span class="go">1           1   string2      2</span>
 <span class="go">2           2  #Comment      3</span>

 <span class="go">Index and header can be specified via the `index_col` and `header` arguments</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
 <span class="go">     0         1      2</span>
 <span class="go">0  NaN      Name  Value</span>
 <span class="go">1  0.0   string1      1</span>
 <span class="go">2  1.0   string2      2</span>
 <span class="go">3  2.0  #Comment      3</span>

 <span class="go">Column types are inferred but can be explicitly specified</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
 <span class="gp">... </span>              <span class="n">dtype</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;Name&#39;</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="s1">&#39;Value&#39;</span><span class="p">:</span> <span class="nb">float</span><span class="p">})</span>
 <span class="go">       Name  Value</span>
 <span class="go">0   string1    1.0</span>
 <span class="go">1   string2    2.0</span>
 <span class="go">2  #Comment    3.0</span>

 <span class="go">True, False, and NA values, and thousands separators have defaults,</span>
 <span class="go">but can be explicitly specified, too. Supply the values you would like</span>
 <span class="go">as strings or lists of strings!</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
 <span class="gp">... </span>              <span class="n">na_values</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;string1&#39;</span><span class="p">,</span> <span class="s1">&#39;string2&#39;</span><span class="p">])</span>
 <span class="go">       Name  Value</span>
 <span class="go">0       NaN      1</span>
 <span class="go">1       NaN      2</span>
 <span class="go">2  #Comment      3</span>

 <span class="go">Comment lines in the excel input file can be skipped using the</span>
 <span class="go">``comment`` kwarg.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">comment</span><span class="o">=</span><span class="s1">&#39;#&#39;</span><span class="p">)</span>
 <span class="go">      Name  Value</span>
 <span class="go">0  string1    1.0</span>
 <span class="go">1  string2    2.0</span>
 <span class="go">2     None    NaN</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_feather">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_feather</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_feather" title="Link to this definition"></a></dt>
 <dd><p>Load a feather-format object from the file path.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">read()</span></code> function. The string could be a URL.
 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be: <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.feather</span></code>.</p></li>
 <li><p><strong>columns</strong> (<em>sequence</em><em>, </em><em>default None</em>) – If not provided, all columns are read.</p></li>
 <li><p><strong>use_threads</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether to parallelize reading using multiple threads.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
 <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#type" title="(in Python v3.13)">type</a> of object stored in file</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_feather</span><span class="p">(</span><span class="s2">&quot;path/to/file.feather&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_parquet">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_parquet" title="Link to this definition"></a></dt>
 <dd><p>Load a parquet object from the file path, returning a DataFrame.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">read()</span></code> function.
 The string could be a URL. Valid URL schemes include http, ftp, s3,
 gs, and file. For file URLs, a host is expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.parquet</span></code>.
 A file URL can also be a path to a directory that contains multiple
 partitioned parquet files. Both pyarrow and fastparquet support
 paths to directories as well as file URLs. A directory path could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/tables</span></code> or <code class="docutils literal notranslate"><span class="pre">s3://bucket/partition_dir</span></code>.</p></li>
 <li><p><strong>engine</strong> (<em>{'auto'</em><em>, </em><em>'pyarrow'</em><em>, </em><em>'fastparquet'}</em><em>, </em><em>default 'auto'</em>) – <p>Parquet library to use. If ‘auto’, then the option
 <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code> is used. The default <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code>
 behavior is to try ‘pyarrow’, falling back to ‘fastparquet’ if
 ‘pyarrow’ is unavailable.</p>
 <p>When using the <code class="docutils literal notranslate"><span class="pre">'pyarrow'</span></code> engine and no storage options are provided
 and a filesystem is implemented by both <code class="docutils literal notranslate"><span class="pre">pyarrow.fs</span></code> and <code class="docutils literal notranslate"><span class="pre">fsspec</span></code>
 (e.g. “s3://”), then the <code class="docutils literal notranslate"><span class="pre">pyarrow.fs</span></code> filesystem is attempted first.
 Use the filesystem keyword with an instantiated fsspec filesystem
 if you wish to use its implementation.</p>
 </p></li>
 <li><p><strong>columns</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>default=None</em>) – If not None, only these columns will be read from the file.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.3.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>use_nullable_dtypes</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, use dtypes that use <code class="docutils literal notranslate"><span class="pre">pd.NA</span></code> as missing value indicator
 for the resulting DeferredDataFrame. (only applicable for the <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>
 engine)
 As new dtypes are added that support <code class="docutils literal notranslate"><span class="pre">pd.NA</span></code> in the future, the
 output with this option will change to use those dtypes.
 Note: this is an experimental option, and behaviour (e.g. additional
 support dtypes) may change without notice.</p>
 <div class="deprecated">
 <p><span class="versionmodified deprecated">Deprecated since version 2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>filesystem</strong> (<em>fsspec</em><em> or </em><em>pyarrow filesystem</em><em>, </em><em>default None</em>) – <p>Filesystem object to use when reading the parquet file. Only implemented
 for <code class="docutils literal notranslate"><span class="pre">engine=&quot;pyarrow&quot;</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.1.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>filters</strong> (<em>List</em><em>[</em><em>Tuple</em><em>] or </em><em>List</em><em>[</em><em>List</em><em>[</em><em>Tuple</em><em>]</em><em>]</em><em>, </em><em>default None</em>) – <p>To filter out data.
 Filter syntax: [[(column, op, val), …],…]
 where op is [==, =, &gt;, &gt;=, &lt;, &lt;=, !=, in, not in]
 The innermost tuples are transposed into a set of filters applied
 through an <cite>AND</cite> operation.
 The outer list combines these sets of filters through an <cite>OR</cite>
 operation.
 A single list of tuples can also be used, meaning that no <cite>OR</cite>
 operation between set of filters is to be conducted.</p>
 <p>Using this argument will NOT result in row-wise filtering of the final
 partitions unless <code class="docutils literal notranslate"><span class="pre">engine=&quot;pyarrow&quot;</span></code> is also specified.  For
 other engines, filtering is only performed at the partition level, that is,
 to prevent the loading of some row-groups and/or files.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.1.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>**kwargs</strong> – Any additional kwargs are passed to the engine.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
 <dd class="field-even"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_parquet</span></code></dt><dd><p>Create a parquet object that serializes a DeferredDataFrame.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">original_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
 <span class="gp">... </span>    <span class="p">{</span><span class="s2">&quot;foo&quot;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">),</span> <span class="s2">&quot;bar&quot;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">)}</span>
 <span class="gp">... </span>   <span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">original_df</span>
 <span class="go">   foo  bar</span>
 <span class="go">0    0    5</span>
 <span class="go">1    1    6</span>
 <span class="go">2    2    7</span>
 <span class="go">3    3    8</span>
 <span class="go">4    4    9</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df_parquet_bytes</span> <span class="o">=</span> <span class="n">original_df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">io</span><span class="w"> </span><span class="kn">import</span> <span class="n">BytesIO</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="n">BytesIO</span><span class="p">(</span><span class="n">df_parquet_bytes</span><span class="p">))</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_df</span>
 <span class="go">   foo  bar</span>
 <span class="go">0    0    5</span>
 <span class="go">1    1    6</span>
 <span class="go">2    2    7</span>
 <span class="go">3    3    8</span>
 <span class="go">4    4    9</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">original_df</span><span class="p">)</span>
 <span class="go">True</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_bar</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="n">BytesIO</span><span class="p">(</span><span class="n">df_parquet_bytes</span><span class="p">),</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;bar&quot;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_bar</span>
 <span class="go">    bar</span>
 <span class="go">0    5</span>
 <span class="go">1    6</span>
 <span class="go">2    7</span>
 <span class="go">3    8</span>
 <span class="go">4    9</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_bar</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">original_df</span><span class="p">[[</span><span class="s1">&#39;bar&#39;</span><span class="p">]])</span>
 <span class="go">True</span>

 <span class="go">The function uses `kwargs` that are passed directly to the engine.</span>
 <span class="go">In the following example, we use the `filters` argument of the pyarrow</span>
 <span class="go">engine to filter the rows of the DataFrame.</span>

 <span class="go">Since `pyarrow` is the default engine, we can omit the `engine` argument.</span>
 <span class="go">Note that the `filters` argument is implemented by the `pyarrow` engine,</span>
 <span class="go">which can benefit from multithreading and also potentially be more</span>
 <span class="go">economical in terms of memory.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">sel</span> <span class="o">=</span> <span class="p">[(</span><span class="s2">&quot;foo&quot;</span><span class="p">,</span> <span class="s2">&quot;&gt;&quot;</span><span class="p">,</span> <span class="mi">2</span><span class="p">)]</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_part</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="n">BytesIO</span><span class="p">(</span><span class="n">df_parquet_bytes</span><span class="p">),</span> <span class="n">filters</span><span class="o">=</span><span class="n">sel</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">restored_part</span>
 <span class="go">    foo  bar</span>
 <span class="go">0    3    8</span>
 <span class="go">1    4    9</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_sas">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_sas</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_sas" title="Link to this definition"></a></dt>
 <dd><p>Read SAS files stored as either XPORT or SAS7BDAT format files.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">read()</span></code> function. The string could be a URL.
 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.sas7bdat</span></code>.</p></li>
 <li><p><strong>format</strong> (<em>str {'xport'</em><em>, </em><em>'sas7bdat'}</em><em> or </em><em>None</em>) – If None, file format is inferred from file extension. If ‘xport’ or
 ‘sas7bdat’, uses the corresponding format.</p></li>
 <li><p><strong>index</strong> (<em>identifier</em><em> of </em><em>index column</em><em>, </em><em>defaults to None</em>) – Identifier of column that should be used as index of the DeferredDataFrame.</p></li>
 <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default is None</em>) – Encoding for text data.  If None, text data are stored as raw bytes.</p></li>
 <li><p><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Read file <cite>chunksize</cite> lines at a time, returns iterator.</p></li>
 <li><p><strong>iterator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>defaults to False</em>) – If True, returns an iterator for reading the file incrementally.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘filepath_or_buffer’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p><ul class="simple">
 <li><p><em>DeferredDataFrame if iterator=False and chunksize=None, else SAS7BDATReader</em></p></li>
 <li><p><em>or XportReader</em></p></li>
 </ul>
 </p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_sas</span><span class="p">(</span><span class="s2">&quot;sas_data.sas7bdat&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_spss">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_spss</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_spss" title="Link to this definition"></a></dt>
 <dd><p>Load an SPSS file from the file path, returning a DataFrame.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><em>Path</em>) – File path.</p></li>
 <li><p><strong>usecols</strong> (<em>list-like</em><em>, </em><em>optional</em>) – Return a subset of the columns. If None, return all columns.</p></li>
 <li><p><strong>convert_categoricals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default is True</em>) – Convert categorical columns into pd.Categorical.</p></li>
 <li><p><strong>dtype_backend</strong> (<em>{'numpy_nullable'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>default 'numpy_nullable'</em>) – <p>Back-end data type applied to the resultant <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (still experimental). Behaviour is as follows:</p>
 <ul>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;numpy_nullable&quot;</span></code>: returns nullable-dtype-backed <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>
 (default).</p></li>
 <li><p><code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code>: returns pyarrow-backed nullable <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowDtype</span></code>
 DeferredDataFrame.</p></li>
 </ul>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 2.0.</span></p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
 <dd class="field-even"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_spss</span><span class="p">(</span><span class="s2">&quot;spss_data.sav&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.read_stata">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">read_stata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_stata" title="Link to this definition"></a></dt>
 <dd><p>Read Stata file into DataFrame.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be: <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.dta</span></code>.</p>
 <p>If you want to pass in a path object, pandas accepts any <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method,
 such as a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function)
 or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 </p></li>
 <li><p><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Convert date variables to DeferredDataFrame time values.</p></li>
 <li><p><strong>convert_categoricals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Read value labels and convert columns to Categorical/Factor variables.</p></li>
 <li><p><strong>index_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – Column to set as index.</p></li>
 <li><p><strong>convert_missing</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Flag indicating whether to convert missing values to their Stata
 representations.  If False, missing values are replaced with nan.
 If True, columns containing missing values are returned with
 object data types and missing values are represented by
 StataMissingValue objects.</p></li>
 <li><p><strong>preserve_dtypes</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Preserve Stata datatypes. If False, numeric data are upcast to pandas
 default types for foreign data (float64 or int64).</p></li>
 <li><p><strong>columns</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> or </em><em>None</em>) – Columns to retain.  Columns will be returned in the given order.  None
 returns all columns.</p></li>
 <li><p><strong>order_categoricals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Flag indicating whether converted categorical data are ordered.</p></li>
 <li><p><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default None</em>) – Return StataReader object for iterations, returns chunks with
 given number of lines.</p></li>
 <li><p><strong>iterator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default False</em>) – Return StataReader object.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘filepath_or_buffer’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 </p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
 <dd class="field-even"><p><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or pandas.api.typing.StataReader</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">io.stata.StataReader</span></code></dt><dd><p>Low-level reader for Stata data files.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_stata</span></code></dt><dd><p>Export Stata data files.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>Categorical variables read through an iterator may not have the same
 categories and dtype. This occurs when  a variable stored in a DTA
 file is associated to an incomplete set of value labels that only
 label a strict subset of the values.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Creating a dummy stata for this example</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                    <span class="s1">&#39;speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">350</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">361</span><span class="p">,</span> <span class="mi">15</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>

 <span class="go">Read a Stata dta file:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>

 <span class="go">Read a Stata dta file in 10,000 line chunks:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">values</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="p">(</span><span class="mi">20_000</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;uint8&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;i&quot;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;filename.dta&#39;</span><span class="p">)</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_stata</span><span class="p">(</span><span class="s1">&#39;filename.dta&#39;</span><span class="p">,</span> <span class="n">chunksize</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span> <span class="k">as</span> <span class="n">itr</span><span class="p">:</span>
 <span class="gp">&gt;&gt;&gt; </span>    <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">itr</span><span class="p">:</span>
 <span class="gp">... </span>        <span class="c1"># Operate on a single chunk, e.g., chunk.mean()</span>
 <span class="gp">... </span>        <span class="k">pass</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_excel">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_excel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_excel" title="Link to this definition"></a></dt>
 <dd><p>Write object to an Excel sheet.</p>
 <p>To write a single object to an Excel .xlsx file it is only necessary to
 specify a target file name. To write to multiple sheets it is necessary to
 create an <cite>ExcelWriter</cite> object with a target file name, and specify a sheet
 in the file to write to.</p>
 <p>Multiple sheets may be written to by specifying unique <cite>sheet_name</cite>.
 With all data written to the file it is necessary to save the changes.
 Note that creating an <cite>ExcelWriter</cite> object with a file name that already
 exists will result in the contents of the existing file being erased.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>excel_writer</strong> (<em>path-like</em><em>, </em><em>file-like</em><em>, or </em><em>ExcelWriter object</em>) – File path or existing ExcelWriter.</p></li>
 <li><p><strong>sheet_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default 'Sheet1'</em>) – Name of sheet which will contain DeferredDataFrame.</p></li>
 <li><p><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</p></li>
 <li><p><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – Format string for floating point numbers. For example
 <code class="docutils literal notranslate"><span class="pre">float_format=&quot;%.2f&quot;</span></code> will format 0.1234 to 0.12.</p></li>
 <li><p><strong>columns</strong> (<em>sequence</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – Columns to write.</p></li>
 <li><p><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default True</em>) – Write out the column names. If a list of string is given it is
 assumed to be aliases for the column names.</p></li>
 <li><p><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</p></li>
 <li><p><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><em>sequence</em><em>, </em><em>optional</em>) – Column label for index column(s) if desired. If not specified, and
 <cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
 sequence should be given if the DeferredDataFrame uses MultiIndex.</p></li>
 <li><p><strong>startrow</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell row to dump data frame.</p></li>
 <li><p><strong>startcol</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell column to dump data frame.</p></li>
 <li><p><strong>engine</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – Write engine to use, ‘openpyxl’ or ‘xlsxwriter’. You can also set this
 via the options <code class="docutils literal notranslate"><span class="pre">io.excel.xlsx.writer</span></code> or
 <code class="docutils literal notranslate"><span class="pre">io.excel.xlsm.writer</span></code>.</p></li>
 <li><p><strong>merge_cells</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default True</em>) – Write MultiIndex and Hierarchical Rows as merged cells.</p></li>
 <li><p><strong>inf_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>default 'inf'</em>) – Representation for infinity (there is no native representation for
 infinity in Excel).</p></li>
 <li><p><strong>freeze_panes</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.13)"><em>tuple</em></a><em> of </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em> (</em><em>length 2</em><em>)</em><em>, </em><em>optional</em>) – Specifies the one-based bottommost row and rightmost column that
 is to be frozen.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.2.0.</span></p>
 </div>
 </p></li>
 <li><p><strong>engine_kwargs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – Arbitrary keyword arguments passed to excel engine.</p></li>
 </ul>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.to_csv" title="apache_beam.dataframe.io.to_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_csv</span></code></a></dt><dd><p>Write DeferredDataFrame to a comma-separated values (csv) file.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">ExcelWriter</span></code></dt><dd><p>Class for writing DeferredDataFrame objects into excel sheets.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_excel" title="apache_beam.dataframe.io.read_excel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_excel</span></code></a></dt><dd><p>Read an Excel file into a pandas DeferredDataFrame.</p>
 </dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_csv</span></code></a></dt><dd><p>Read a comma-separated values (csv) file into DeferredDataFrame.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">io.formats.style.Styler.to_excel</span></code></dt><dd><p>Add styles to Excel sheet.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>For compatibility with <code class="xref py py-meth docutils literal notranslate"><span class="pre">to_csv()</span></code>,
 to_excel serializes lists and dicts to strings before writing.</p>
 <p>Once a workbook has been saved it is not possible to write further
 data without rewriting the whole workbook.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create, write to and save a workbook:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]],</span>
 <span class="gp">... </span>                   <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;row 1&#39;</span><span class="p">,</span> <span class="s1">&#39;row 2&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col 1&#39;</span><span class="p">,</span> <span class="s1">&#39;col 2&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">)</span>

 <span class="go">To specify the sheet name:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">,</span>
 <span class="gp">... </span>             <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>

 <span class="go">If you wish to write to more than one sheet in the workbook, it is</span>
 <span class="go">necessary to specify an ExcelWriter object:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">df1</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
 <span class="gp">... </span>    <span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>
 <span class="gp">... </span>    <span class="n">df2</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_2&#39;</span><span class="p">)</span>

 <span class="go">ExcelWriter can also be used to append to an existing Excel file:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">,</span>
 <span class="gp">... </span>                    <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
 <span class="gp">... </span>    <span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_3&#39;</span><span class="p">)</span>

 <span class="go">To set the library that is used to write the Excel file,</span>
 <span class="go">you can pass the `engine` keyword (the default engine is</span>
 <span class="go">automatically chosen depending on the file extension):</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s1">&#39;output1.xlsx&#39;</span><span class="p">,</span> <span class="n">engine</span><span class="o">=</span><span class="s1">&#39;xlsxwriter&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_feather">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_feather</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_feather" title="Link to this definition"></a></dt>
 <dd><p>Write a DataFrame to the binary Feather format.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">write()</span></code> function. If a string or a path,
 it will be used as Root Directory path when writing a partitioned dataset.</p></li>
 <li><p><strong>**kwargs</strong> – Additional keywords passed to <code class="xref py py-func docutils literal notranslate"><span class="pre">pyarrow.feather.write_feather()</span></code>.
 This includes the <cite>compression</cite>, <cite>compression_level</cite>, <cite>chunksize</cite>
 and <cite>version</cite> keywords.</p></li>
 </ul>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <p class="rubric">Notes</p>
 <p>This function writes the dataframe as a <a class="reference external" href="https://arrow.apache.org/docs/python/feather.html">feather file</a>. Requires a default
 index. For saving the DeferredDataFrame with your custom index use a method that
 supports custom indices e.g. <cite>to_parquet</cite>.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_feather</span><span class="p">(</span><span class="s2">&quot;file.feather&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_parquet">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_parquet" title="Link to this definition"></a></dt>
 <dd><p>Write a DataFrame to the binary parquet format.</p>
 <p>This function writes the dataframe as a <a class="reference external" href="https://parquet.apache.org/">parquet file</a>. You can choose different parquet
 backends, and have the option of compression. See
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><span class="xref std std-ref">the user guide</span></a> for more details.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em><em>, or </em><em>None</em><em>, </em><em>default None</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">write()</span></code> function. If None, the result is
 returned as bytes. If a string or path, it will be used as Root Directory
 path when writing a partitioned dataset.</p></li>
 <li><p><strong>engine</strong> (<em>{'auto'</em><em>, </em><em>'pyarrow'</em><em>, </em><em>'fastparquet'}</em><em>, </em><em>default 'auto'</em>) – Parquet library to use. If ‘auto’, then the option
 <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code> is used. The default <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code>
 behavior is to try ‘pyarrow’, falling back to ‘fastparquet’ if
 ‘pyarrow’ is unavailable.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><em>None</em><em>, </em><em>default 'snappy'</em>) – Name of the compression to use. Use <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Supported options: ‘snappy’, ‘gzip’, ‘brotli’, ‘lz4’, ‘zstd’.</p></li>
 <li><p><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><em>default None</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, include the dataframe’s index(es) in the file output.
 If <code class="docutils literal notranslate"><span class="pre">False</span></code>, they will not be written to the file.
 If <code class="docutils literal notranslate"><span class="pre">None</span></code>, similar to <code class="docutils literal notranslate"><span class="pre">True</span></code> the dataframe’s index(es)
 will be saved. However, instead of being saved as values,
 the RangeIndex will be stored as a range in the metadata so it
 doesn’t require much space and is faster. Other indexes will
 be included as columns in the file output.</p></li>
 <li><p><strong>partition_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Column names by which to partition the dataset.
 Columns are partitioned in the order they are given.
 Must be None if path is not a string.</p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 <li><p><strong>**kwargs</strong> – Additional arguments passed to the parquet library. See
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v3.0.0.dev0+2296.gfcd2a5d50f)"><span class="xref std std-ref">pandas io</span></a> for more details.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
 <dd class="field-even"><p>bytes if no path argument is provided else None</p>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_parquet" title="apache_beam.dataframe.io.read_parquet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_parquet</span></code></a></dt><dd><p>Read a parquet file.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_orc</span></code></dt><dd><p>Write an orc file.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv</span></code></dt><dd><p>Write a csv file.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_sql</span></code></dt><dd><p>Write to a sql table.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.to_hdf</span></code></dt><dd><p>Write to hdf.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>This function requires either the <a class="reference external" href="https://pypi.org/project/fastparquet">fastparquet</a> or <a class="reference external" href="https://arrow.apache.org/docs/python/">pyarrow</a> library.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">,</span>
 <span class="gp">... </span>              <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;gzip&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">)</span>
 <span class="go">   col1  col2</span>
 <span class="go">0     1     3</span>
 <span class="go">1     2     4</span>

 <span class="go">If you want to get a buffer to the parquet content you can use a io.BytesIO</span>
 <span class="go">object, as long as you don&#39;t use partition_cols, which creates multiple files.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span><span class="w"> </span><span class="nn">io</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">BytesIO</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 <span class="go">0</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="py function">
 <dt class="sig sig-object py" id="apache_beam.dataframe.io.to_stata">
 <span class="sig-prename descclassname"><span class="pre">apache_beam.dataframe.io.</span></span><span class="sig-name descname"><span class="pre">to_stata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_stata" title="Link to this definition"></a></dt>
 <dd><p>Export DataFrame object to Stata dta format.</p>
 <p>Writes the DataFrame to a Stata dataset file.
 “dta” files contain a Stata dataset.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>buffer</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">write()</span></code> function.</p></li>
 <li><p><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a>) – Dictionary mapping columns containing datetime types to stata
 internal format to use when writing the dates. Options are ‘tc’,
 ‘td’, ‘tm’, ‘tw’, ‘th’, ‘tq’, ‘ty’. Column can be either an integer
 or a name. Datetime columns that do not have a conversion type
 specified will be converted to ‘tc’. Raises NotImplementedError if
 a datetime column has timezone information.</p></li>
 <li><p><strong>write_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Write the index to Stata dataset.</p></li>
 <li><p><strong>byteorder</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Can be “&gt;”, “&lt;”, “little”, or “big”. default is <cite>sys.byteorder</cite>.</p></li>
 <li><p><strong>time_stamp</strong> (<em>datetime</em>) – A datetime to use as file creation date.  Default is the current
 time.</p></li>
 <li><p><strong>data_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>optional</em>) – A label for the data set.  Must be 80 characters or smaller.</p></li>
 <li><p><strong>variable_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a>) – Dictionary containing columns as keys and variable labels as
 values. Each label must be 80 characters or smaller.</p></li>
 <li><p><strong>version</strong> (<em>{114</em><em>, </em><em>117</em><em>, </em><em>118</em><em>, </em><em>119</em><em>, </em><em>None}</em><em>, </em><em>default 114</em>) – <p>Version to use in the output dta file. Set to None to let pandas
 decide between 118 or 119 formats depending on the number of
 columns in the frame. pandas Version 114 can be read by Stata 10 and
 later. pandas Version 117 can be read by Stata 13 or later. pandas Version 118
 is supported in Stata 14 and later. pandas Version 119 is supported in
 Stata 15 and later. pandas Version 114 limits string variables to 244
 characters or fewer while versions 117 and later allow strings
 with lengths up to 2,000,000 characters. Versions 118 and 119
 support Unicode characters, and pandas version 119 supports more than
 32,767 variables.</p>
 <p>pandas Version 119 should usually only be used when the number of
 variables exceeds the capacity of dta format 118. Exporting
 smaller datasets in format 119 may have unintended consequences,
 and, as of November 2020, Stata SE cannot read pandas version 119 files.</p>
 </p></li>
 <li><p><strong>convert_strl</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><em>list</em></a><em>, </em><em>optional</em>) – List of column names to convert to string columns to Stata StrL
 format. Only available if version is 117.  Storing strings in the
 StrL format can produce smaller dta files if strings have more than
 8 characters and values are repeated.</p></li>
 <li><p><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output data. If ‘infer’ and ‘path’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'xz'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and
 other key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdCompressor</span></code>, <code class="docutils literal notranslate"><span class="pre">lzma.LZMAFile</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for faster compression and to create
 a reproducible gzip archive:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'gzip',</span> <span class="pre">'compresslevel':</span> <span class="pre">1,</span> <span class="pre">'mtime':</span> <span class="pre">1}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified changed">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </p></li>
 <li><p><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </p></li>
 <li><p><strong>value_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em> of </em><em>dicts</em>) – <p>Dictionary containing columns as keys and dictionaries of column value
 to labels as values. Labels for a single variable must be 32,000
 characters or smaller.</p>
 <div class="versionadded">
 <p><span class="versionmodified added">Added in version 1.4.0.</span></p>
 </div>
 </p></li>
 </ul>
 </dd>
 <dt class="field-even">Raises<span class="colon">:</span></dt>
 <dd class="field-even"><ul class="simple">
 <li><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="(in Python v3.13)"><strong>NotImplementedError</strong></a> – <ul>
 <li><p>If datetimes contain timezone information
     * Column dtype is not representable in Stata</p></li>
 </ul>
 </p></li>
 <li><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.13)"><strong>ValueError</strong></a> – <ul>
 <li><p>Columns listed in convert_dates are neither datetime64[ns]
       or datetime.datetime
     * Column listed in convert_dates is not in DeferredDataFrame
     * Categorical label contains more than 32,000 characters</p></li>
 </ul>
 </p></li>
 </ul>
 </dd>
 </dl>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <dl class="simple">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_stata" title="apache_beam.dataframe.io.read_stata"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read_stata</span></code></a></dt><dd><p>Import Stata data files.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">io.stata.StataWriter</span></code></dt><dd><p>Low-level writer for Stata data files.</p>
 </dd>
 <dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">io.stata.StataWriter117</span></code></dt><dd><p>Low-level writer for pandas version 117 files.</p>
 </dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span>
 <span class="gp">... </span>                              <span class="s1">&#39;parrot&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="s1">&#39;speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">350</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">361</span><span class="p">,</span> <span class="mi">15</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 </section>


            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="apache_beam.dataframe.frames.html" class="btn btn-neutral float-left" title="apache_beam.dataframe.frames module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="apache_beam.dataframe.pandas_top_level_functions.html" class="btn btn-neutral float-right" title="apache_beam.dataframe.pandas_top_level_functions module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>

   <hr/>

   <div role="contentinfo">
     <p>&#169; Copyright %Y, Apache Beam.</p>
   </div>

   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.


 </footer>
         </div>
       </div>
     </section>
   </div>
   <script>
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>

 </body>
 </html>