pydoc/2.56.0/apache_beam.dataframe.io.html - beam-site - Git at Google



 <!DOCTYPE html>
 <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
 <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">

   <meta name="viewport" content="width=device-width, initial-scale=1.0">

   <title>apache_beam.dataframe.io module &mdash; Apache Beam 2.56.0 documentation</title>


   <script type="text/javascript" src="_static/js/modernizr.min.js"></script>


       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
         <script type="text/javascript" src="_static/jquery.js"></script>
         <script type="text/javascript" src="_static/underscore.js"></script>
         <script type="text/javascript" src="_static/doctools.js"></script>
         <script type="text/javascript" src="_static/language_data.js"></script>
         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>

     <script type="text/javascript" src="_static/js/theme.js"></script>


   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
     <link rel="index" title="Index" href="genindex.html" />
     <link rel="search" title="Search" href="search.html" />
     <link rel="next" title="apache_beam.dataframe.pandas_top_level_functions module" href="apache_beam.dataframe.pandas_top_level_functions.html" />
     <link rel="prev" title="apache_beam.dataframe.frames module" href="apache_beam.dataframe.frames.html" />
 </head>

 <body class="wy-body-for-nav">


   <div class="wy-grid-for-nav">

     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >


             <a href="index.html" class="icon icon-home"> Apache Beam


           </a>


               <div class="version">
                 2.56.0
               </div>


 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
     <input type="text" name="q" placeholder="Search docs" />
     <input type="hidden" name="check_keywords" value="yes" />
     <input type="hidden" name="area" value="default" />
   </form>
 </div>


         </div>

         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">


               <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="apache_beam.dataframe.html#submodules">Submodules</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.convert.html">apache_beam.dataframe.convert module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.doctests.html">apache_beam.dataframe.doctests module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.expressions.html">apache_beam.dataframe.expressions module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.frame_base.html">apache_beam.dataframe.frame_base module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.frames.html">apache_beam.dataframe.frames module</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">apache_beam.dataframe.io module</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="#sources">Sources</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#sinks">Sinks</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.pandas_top_level_functions.html">apache_beam.dataframe.pandas_top_level_functions module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.partitionings.html">apache_beam.dataframe.partitionings module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.schemas.html">apache_beam.dataframe.schemas module</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.transforms.html">apache_beam.dataframe.transforms module</a></li>
 </ul>
 </li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li>
 </ul>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
 </ul>


         </div>
       </div>
     </nav>

     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">


       <nav class="wy-nav-top" aria-label="top navigation">

           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
           <a href="index.html">Apache Beam</a>

       </nav>


       <div class="wy-nav-content">

         <div class="rst-content">


 <div role="navigation" aria-label="breadcrumbs navigation">

   <ul class="wy-breadcrumbs">

       <li><a href="index.html">Docs</a> &raquo;</li>

           <li><a href="apache_beam.dataframe.html">apache_beam.dataframe package</a> &raquo;</li>

       <li>apache_beam.dataframe.io module</li>


       <li class="wy-breadcrumbs-aside">


             <a href="_sources/apache_beam.dataframe.io.rst.txt" rel="nofollow"> View page source</a>


       </li>

   </ul>


   <hr/>
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">

   <div class="section" id="module-apache_beam.dataframe.io">
 <span id="apache-beam-dataframe-io-module"></span><h1>apache_beam.dataframe.io module<a class="headerlink" href="#module-apache_beam.dataframe.io" title="Permalink to this headline">¶</a></h1>
 <p>Sources and sinks for the Beam DataFrame API.</p>
 <div class="section" id="sources">
 <h2>Sources<a class="headerlink" href="#sources" title="Permalink to this headline">¶</a></h2>
 <p>This module provides analogs for pandas <code class="docutils literal notranslate"><span class="pre">read</span></code> methods, like
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.read_csv.html#pandas.read_csv" title="(in pandas v3.0.0.dev0+802.g7c836ed2ec)"><code class="xref py py-func docutils literal notranslate"><span class="pre">pandas.read_csv()</span></code></a>. However Beam sources like <a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a>
 create a Beam <code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code>, and return a
 <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> or
 <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a> representing the contents
 of the referenced file(s) or data source.</p>
 <p>The result of these methods must be applied to a <code class="xref py py-class docutils literal notranslate"><span class="pre">Pipeline</span></code>
 object, for example:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">dataframe</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="o">...</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="sinks">
 <h2>Sinks<a class="headerlink" href="#sinks" title="Permalink to this headline">¶</a></h2>
 <p>This module also defines analogs for pandas sink, or <code class="docutils literal notranslate"><span class="pre">to</span></code>, methods that
 generate a Beam <code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code>. Users should prefer calling
 these operations from <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a>
 instances (for example with
 <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv</span></code></a>).</p>
 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_gbq">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_gbq</code><span class="sig-paren">(</span><em>table</em>, <em>dataset=None</em>, <em>project_id=None</em>, <em>use_bqstorage_api=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_gbq"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_gbq" title="Permalink to this definition">¶</a></dt>
 <dd><p>This function reads data from a BigQuery table and produces a
 :class:<a href="#id1"><span class="problematic" id="id2">`</span></a>~apache_beam.dataframe.frames.DeferredDataFrame.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a>) – Please specify a table. This can be done in the format
 ‘PROJECT:dataset.table’ if one would not wish to utilize
 the parameters below.</li>
 <li><strong>dataset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a>) – Please specify the dataset
 (can omit if table was specified as ‘PROJECT:dataset.table’).</li>
 <li><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a>) – Please specify the project ID
 (can omit if table was specified as ‘PROJECT:dataset.table’).</li>
 <li><strong>use_bqstorage_api</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – If you would like to utilize
 the BigQuery Storage API in ReadFromBigQuery, please set
 this flag to true. Otherwise, please set flag
 to false or leave it unspecified.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_csv">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_csv</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>splittable=False</em>, <em>binary=True</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_csv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_csv" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read a comma-separated values (csv) file into DataFrame.</p>
 <p>Also supports optionally iterating or breaking of the file
 into chunks.</p>
 <p>Additional help can be found in the online docs for
 <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html">IO Tools</a>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
 expected. A local file could be: <a class="reference external" href="file://localhost/path/to/table.csv">file://localhost/path/to/table.csv</a>.</p>
 <p>If you want to pass in a path object, pandas accepts any <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method, such as
 a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function) or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 </li>
 <li><strong>sep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '</em><em>,</em><em>'</em>) – Delimiter to use. If sep is None, the C engine cannot automatically detect
 the separator, but the Python parsing engine can, meaning the latter will
 be used and automatically detect the separator by Python’s builtin sniffer
 tool, <code class="docutils literal notranslate"><span class="pre">csv.Sniffer</span></code>. In addition, separators longer than 1 character and
 different from <code class="docutils literal notranslate"><span class="pre">'\s+'</span></code> will be interpreted as regular expressions and
 will also force the use of the Python parsing engine. Note that regex
 delimiters are prone to ignoring quoted data. Regex example: <code class="docutils literal notranslate"><span class="pre">'\r\t'</span></code>.</li>
 <li><strong>delimiter</strong> (str, default <code class="docutils literal notranslate"><span class="pre">None</span></code>) – Alias for sep.</li>
 <li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>list of int</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a><em>, </em><em>default 'infer'</em>) – Row number(s) to use as the column names, and the start of the
 data.  Default behavior is to infer the column names: if no names
 are passed the behavior is identical to <code class="docutils literal notranslate"><span class="pre">header=0</span></code> and column
 names are inferred from the first line of the file, if column
 names are passed explicitly then the behavior is identical to
 <code class="docutils literal notranslate"><span class="pre">header=None</span></code>. Explicitly pass <code class="docutils literal notranslate"><span class="pre">header=0</span></code> to be able to
 replace existing names. The header can be a list of integers that
 specify row locations for a multi-index on the columns
 e.g. [0,1,3]. Intervening rows that are not specified will be
 skipped (e.g. 2 in this example is skipped). Note that this
 parameter ignores commented lines and empty lines if
 <code class="docutils literal notranslate"><span class="pre">skip_blank_lines=True</span></code>, so <code class="docutils literal notranslate"><span class="pre">header=0</span></code> denotes the first line of
 data rather than the first line of the file.</li>
 <li><strong>names</strong> (<em>array-like</em><em>, </em><em>optional</em>) – List of column names to use. If the file contains a header row,
 then you should explicitly pass <code class="docutils literal notranslate"><span class="pre">header=0</span></code> to override the column names.
 Duplicates in this list are not allowed.</li>
 <li><strong>index_col</strong> (int, str, sequence of int / str, or False, optional, default <code class="docutils literal notranslate"><span class="pre">None</span></code>) – <p>Column(s) to use as the row labels of the <code class="docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>, either given as
 string name or column index. If a sequence of int / str is given, a
 MultiIndex is used.</p>
 <p>Note: <code class="docutils literal notranslate"><span class="pre">index_col=False</span></code> can be used to force pandas to <em>not</em> use the first
 column as the index, e.g. when you have a malformed file with delimiters at
 the end of each line.</p>
 </li>
 <li><strong>usecols</strong> (<em>list-like</em><em> or </em><em>callable</em><em>, </em><em>optional</em>) – <p>Return a subset of the columns. If list-like, all elements must either
 be positional (i.e. integer indices into the document columns) or strings
 that correspond to column names provided either by the user in <cite>names</cite> or
 inferred from the document header row(s). If <code class="docutils literal notranslate"><span class="pre">names</span></code> are given, the document
 header row(s) are not taken into account. For example, a valid list-like
 <cite>usecols</cite> parameter would be <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">1,</span> <span class="pre">2]</span></code> or <code class="docutils literal notranslate"><span class="pre">['foo',</span> <span class="pre">'bar',</span> <span class="pre">'baz']</span></code>.
 Element order is ignored, so <code class="docutils literal notranslate"><span class="pre">usecols=[0,</span> <span class="pre">1]</span></code> is the same as <code class="docutils literal notranslate"><span class="pre">[1,</span> <span class="pre">0]</span></code>.
 To instantiate a DeferredDataFrame from <code class="docutils literal notranslate"><span class="pre">data</span></code> with element order preserved use
 <code class="docutils literal notranslate"><span class="pre">pd.read_csv(data,</span> <span class="pre">usecols=['foo',</span> <span class="pre">'bar'])[['foo',</span> <span class="pre">'bar']]</span></code> for columns
 in <code class="docutils literal notranslate"><span class="pre">['foo',</span> <span class="pre">'bar']</span></code> order or
 <code class="docutils literal notranslate"><span class="pre">pd.read_csv(data,</span> <span class="pre">usecols=['foo',</span> <span class="pre">'bar'])[['bar',</span> <span class="pre">'foo']]</span></code>
 for <code class="docutils literal notranslate"><span class="pre">['bar',</span> <span class="pre">'foo']</span></code> order.</p>
 <p>If callable, the callable function will be evaluated against the column
 names, returning names where the callable function evaluates to True. An
 example of a valid callable argument would be <code class="docutils literal notranslate"><span class="pre">lambda</span> <span class="pre">x:</span> <span class="pre">x.upper()</span> <span class="pre">in</span>
 <span class="pre">['AAA',</span> <span class="pre">'BBB',</span> <span class="pre">'DDD']</span></code>. Using this parameter results in much faster
 parsing time and lower memory usage.</p>
 </li>
 <li><strong>dtype</strong> (<em>Type name</em><em> or </em><em>dict of column -&gt; type</em><em>, </em><em>optional</em>) – <p>Data type for data or columns. E.g. {‘a’: np.float64, ‘b’: np.int32,
 ‘c’: ‘Int64’}
 Use <cite>str</cite> or <cite>object</cite> together with suitable <cite>na_values</cite> settings
 to preserve and not interpret dtype.
 If converters are specified, they will be applied INSTEAD
 of dtype conversion.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Support for defaultdict was added. Specify a defaultdict as input where
 the default determines the dtype of the columns which are not explicitly
 listed.</p>
 </div>
 </li>
 <li><strong>engine</strong> (<em>{'c'</em><em>, </em><em>'python'</em><em>, </em><em>'pyarrow'}</em><em>, </em><em>optional</em>) – <p>Parser engine to use. The C and pyarrow engines are faster, while the python engine
 is currently more feature-complete. Multithreading is currently only supported by
 the pyarrow engine.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.4.0: </span>The “pyarrow” engine was added as an <em>experimental</em> engine, and some features
 are unsupported, or may not work correctly, with this engine.</p>
 </div>
 </li>
 <li><strong>converters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – Dict of functions for converting values in certain columns. Keys can either
 be integers or column labels.</li>
 <li><strong>true_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>optional</em>) – Values to consider as True in addition to case-insensitive variants of “True”.</li>
 <li><strong>false_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>optional</em>) – Values to consider as False in addition to case-insensitive variants of “False”.</li>
 <li><strong>skipinitialspace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Skip spaces after delimiter.</li>
 <li><strong>skiprows</strong> (<em>list-like</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em> or </em><em>callable</em><em>, </em><em>optional</em>) – <p>Line numbers to skip (0-indexed) or number of lines to skip (int)
 at the start of the file.</p>
 <p>If callable, the callable function will be evaluated against the row
 indices, returning True if the row should be skipped and False otherwise.
 An example of a valid callable argument would be <code class="docutils literal notranslate"><span class="pre">lambda</span> <span class="pre">x:</span> <span class="pre">x</span> <span class="pre">in</span> <span class="pre">[0,</span> <span class="pre">2]</span></code>.</p>
 </li>
 <li><strong>skipfooter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default 0</em>) – Number of lines at bottom of file to skip (Unsupported with engine=’c’).</li>
 <li><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – Number of rows of file to read. Useful for reading pieces of large files.</li>
 <li><strong>na_values</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>list-like</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – Additional strings to recognize as NA/NaN. If dict passed, specific
 per-column NA values.  By default the following values are interpreted as
 NaN: ‘’, ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, ‘-NaN’, ‘-nan’,
 ‘1.#IND’, ‘1.#QNAN’, ‘&lt;NA&gt;’, ‘N/A’, ‘NA’, ‘NULL’, ‘NaN’, ‘None’,
 ‘n/a’, ‘nan’, ‘null’.</li>
 <li><strong>keep_default_na</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>Whether or not to include the default NaN values when parsing the data.
 Depending on whether <cite>na_values</cite> is passed in, the behavior is as follows:</p>
 <ul>
 <li>If <cite>keep_default_na</cite> is True, and <cite>na_values</cite> are specified, <cite>na_values</cite>
 is appended to the default NaN values used for parsing.</li>
 <li>If <cite>keep_default_na</cite> is True, and <cite>na_values</cite> are not specified, only
 the default NaN values are used for parsing.</li>
 <li>If <cite>keep_default_na</cite> is False, and <cite>na_values</cite> are specified, only
 the NaN values specified <cite>na_values</cite> are used for parsing.</li>
 <li>If <cite>keep_default_na</cite> is False, and <cite>na_values</cite> are not specified, no
 strings will be parsed as NaN.</li>
 </ul>
 <p>Note that if <cite>na_filter</cite> is passed in as False, the <cite>keep_default_na</cite> and
 <cite>na_values</cite> parameters will be ignored.</p>
 </li>
 <li><strong>na_filter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Detect missing value markers (empty strings and the value of na_values). In
 data without any NAs, passing na_filter=False can improve the performance
 of reading a large file.</li>
 <li><strong>verbose</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Indicate number of NA values placed in non-numeric columns.</li>
 <li><strong>skip_blank_lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – If True, skip over blank lines rather than interpreting as NaN values.</li>
 <li><strong>parse_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em> or </em><em>list of int</em><em> or </em><em>names</em><em> or </em><em>list of lists</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default False</em>) – <p>The behavior is as follows:</p>
 <ul>
 <li>boolean. If True -&gt; try parsing the index.</li>
 <li>list of int or names. e.g. If [1, 2, 3] -&gt; try parsing columns 1, 2, 3
 each as a separate date column.</li>
 <li>list of lists. e.g.  If [[1, 3]] -&gt; combine columns 1 and 3 and parse as
 a single date column.</li>
 <li>dict, e.g. {‘foo’ : [1, 3]} -&gt; parse columns 1, 3 as date and call
 result ‘foo’</li>
 </ul>
 <p>If a column or index cannot be represented as an array of datetimes,
 say because of an unparsable value or a mixture of timezones, the column
 or index will be returned unaltered as an object data type. For
 non-standard datetime parsing, use <code class="docutils literal notranslate"><span class="pre">pd.to_datetime</span></code> after
 <code class="docutils literal notranslate"><span class="pre">pd.read_csv</span></code>.</p>
 <p>Note: A fast-path exists for iso8601-formatted dates.</p>
 </li>
 <li><strong>infer_datetime_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True and <cite>parse_dates</cite> is enabled, pandas will attempt to infer the
 format of the datetime strings in the columns, and if it can be inferred,
 switch to a faster method of parsing them. In some cases this can increase
 the parsing speed by 5-10x.</p>
 <div class="deprecated">
 <p><span class="versionmodified">Deprecated since version 2.0.0: </span>A strict version of this argument is now the default, passing it has no effect.</p>
 </div>
 </li>
 <li><strong>keep_date_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – If True and <cite>parse_dates</cite> specifies combining multiple columns then
 keep the original columns.</li>
 <li><strong>date_parser</strong> (<em>function</em><em>, </em><em>optional</em>) – <p>Function to use for converting a sequence of string columns to an array of
 datetime instances. The default uses <code class="docutils literal notranslate"><span class="pre">dateutil.parser.parser</span></code> to do the
 conversion. Pandas will try to call <cite>date_parser</cite> in three different ways,
 advancing to the next if an exception occurs: 1) Pass one or more arrays
 (as defined by <cite>parse_dates</cite>) as arguments; 2) concatenate (row-wise) the
 string values from the columns defined by <cite>parse_dates</cite> into a single array
 and pass that; and 3) call <cite>date_parser</cite> once for each row using one or
 more strings (corresponding to the columns defined by <cite>parse_dates</cite>) as
 arguments.</p>
 <div class="deprecated">
 <p><span class="versionmodified">Deprecated since version 2.0.0: </span>Use <code class="docutils literal notranslate"><span class="pre">date_format</span></code> instead, or read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply
 <code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code> as-needed.</p>
 </div>
 </li>
 <li><strong>date_format</strong> (str or dict of column -&gt; format, default <code class="docutils literal notranslate"><span class="pre">None</span></code>) – <p>If used in conjunction with <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code>, will parse dates according to this
 format. For anything more complex,
 please read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply <code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code> as-needed.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.0.</span></p>
 </div>
 </li>
 <li><strong>dayfirst</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – DD/MM format dates, international and European format.</li>
 <li><strong>cache_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – If True, use a cache of unique, converted dates to apply the datetime
 conversion. May produce significant speed-up when parsing duplicate
 date strings, especially ones with timezone offsets.</li>
 <li><strong>iterator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Return TextFileReader object for iteration or getting chunks with
 <code class="docutils literal notranslate"><span class="pre">get_chunk()</span></code>.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2: </span><code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code> is a context manager.</p>
 </div>
 </li>
 <li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – <p>Return TextFileReader object for iteration.
 See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking">IO Tools docs</a>
 for more information on <code class="docutils literal notranslate"><span class="pre">iterator</span></code> and <code class="docutils literal notranslate"><span class="pre">chunksize</span></code>.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2: </span><code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code> is a context manager.</p>
 </div>
 </li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘filepath_or_buffer’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </li>
 <li><strong>thousands</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – Thousands separator.</li>
 <li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character to recognize as decimal point (e.g. use ‘,’ for European data).</li>
 <li><strong>lineterminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – Character to break file into lines. Only valid with C parser.</li>
 <li><strong>quotechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – The character used to denote the start and end of a quoted item. Quoted
 items can include the delimiter and it will be ignored.</li>
 <li><strong>quoting</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em> or </em><em>csv.QUOTE_* instance</em><em>, </em><em>default 0</em>) – Control field quoting behavior per <code class="docutils literal notranslate"><span class="pre">csv.QUOTE_*</span></code> constants. Use one of
 QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).</li>
 <li><strong>doublequote</strong> (bool, default <code class="docutils literal notranslate"><span class="pre">True</span></code>) – When quotechar is specified and quoting is not <code class="docutils literal notranslate"><span class="pre">QUOTE_NONE</span></code>, indicate
 whether or not to interpret two consecutive quotechar elements INSIDE a
 field as a single <code class="docutils literal notranslate"><span class="pre">quotechar</span></code> element.</li>
 <li><strong>escapechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> (</em><em>length 1</em><em>)</em><em>, </em><em>optional</em>) – One-character string used to escape other characters.</li>
 <li><strong>comment</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – Indicates remainder of line should not be parsed. If found at the beginning
 of a line, the line will be ignored altogether. This parameter must be a
 single character. Like empty lines (as long as <code class="docutils literal notranslate"><span class="pre">skip_blank_lines=True</span></code>),
 fully commented lines are ignored by the parameter <cite>header</cite> but not by
 <cite>skiprows</cite>. For example, if <code class="docutils literal notranslate"><span class="pre">comment='#'</span></code>, parsing
 <code class="docutils literal notranslate"><span class="pre">#empty\na,b,c\n1,2,3</span></code> with <code class="docutils literal notranslate"><span class="pre">header=0</span></code> will result in ‘a,b,c’ being
 treated as the header.</li>
 <li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default &quot;utf-8&quot;</em>) – <p>Encoding to use for UTF when reading/writing (ex. ‘utf-8’). <a class="reference external" href="https://docs.python.org/3/library/codecs.html#standard-encodings">List of Python
 standard encodings</a> .</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2: </span>When <code class="docutils literal notranslate"><span class="pre">encoding</span></code> is <code class="docutils literal notranslate"><span class="pre">None</span></code>, <code class="docutils literal notranslate"><span class="pre">errors=&quot;replace&quot;</span></code> is passed to
 <code class="docutils literal notranslate"><span class="pre">open()</span></code>. Otherwise, <code class="docutils literal notranslate"><span class="pre">errors=&quot;strict&quot;</span></code> is passed to <code class="docutils literal notranslate"><span class="pre">open()</span></code>.
 This behavior was previously only the case for <code class="docutils literal notranslate"><span class="pre">engine=&quot;python&quot;</span></code>.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.3.0: </span><code class="docutils literal notranslate"><span class="pre">encoding_errors</span></code> is a new argument. <code class="docutils literal notranslate"><span class="pre">encoding</span></code> has no longer an
 influence on how encoding errors are handled.</p>
 </div>
 </li>
 <li><strong>encoding_errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default &quot;strict&quot;</em>) – <p>How encoding errors are treated. <a class="reference external" href="https://docs.python.org/3/library/codecs.html#error-handlers">List of possible values</a> .</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.3.0.</span></p>
 </div>
 </li>
 <li><strong>dialect</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/csv.html#csv.Dialect" title="(in Python v3.12)"><em>csv.Dialect</em></a><em>, </em><em>optional</em>) – If provided, this parameter will override values (default or not) for the
 following parameters: <cite>delimiter</cite>, <cite>doublequote</cite>, <cite>escapechar</cite>,
 <cite>skipinitialspace</cite>, <cite>quotechar</cite>, and <cite>quoting</cite>. If it is necessary to
 override values, a ParserWarning will be issued. See csv.Dialect
 documentation for more details.</li>
 <li><strong>on_bad_lines</strong> (<em>{'error'</em><em>, </em><em>'warn'</em><em>, </em><em>'skip'}</em><em> or </em><em>callable</em><em>, </em><em>default 'error'</em>) – <p>Specifies what to do upon encountering a bad line (a line with too many fields).
 Allowed values are :</p>
 <blockquote>
 <div><ul>
 <li>’error’, raise an Exception when a bad line is encountered.</li>
 <li>’warn’, raise a warning when a bad line is encountered and skip that line.</li>
 <li>’skip’, skip bad lines without raising or warning when they are encountered.</li>
 </ul>
 </div></blockquote>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.3.0.</span></p>
 </div>
 <div class="versionadded">
 <ul>
 <span class="versionmodified">New in version 1.4.0: </span><li>callable, function with signature
 <code class="docutils literal notranslate"><span class="pre">(bad_line:</span> <span class="pre">list[str])</span> <span class="pre">-&gt;</span> <span class="pre">list[str]</span> <span class="pre">|</span> <span class="pre">None</span></code> that will process a single
 bad line. <code class="docutils literal notranslate"><span class="pre">bad_line</span></code> is a list of strings split by the <code class="docutils literal notranslate"><span class="pre">sep</span></code>.
 If the function returns <code class="docutils literal notranslate"><span class="pre">None</span></code>, the bad line will be ignored.
 If the function returns a new list of strings with more elements than
 expected, a <code class="docutils literal notranslate"><span class="pre">ParserWarning</span></code> will be emitted while dropping extra elements.
 Only supported when <code class="docutils literal notranslate"><span class="pre">engine=&quot;python&quot;</span></code></li>
 </ul>
 </div>
 </li>
 <li><strong>delim_whitespace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Specifies whether or not whitespace (e.g. <code class="docutils literal notranslate"><span class="pre">'</span> <span class="pre">'</span></code> or <code class="docutils literal notranslate"><span class="pre">'</span>&#160;&#160;&#160; <span class="pre">'</span></code>) will be
 used as the sep. Equivalent to setting <code class="docutils literal notranslate"><span class="pre">sep='\s+'</span></code>. If this option
 is set to True, nothing should be passed in for the <code class="docutils literal notranslate"><span class="pre">delimiter</span></code>
 parameter.</li>
 <li><strong>low_memory</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Internally process the file in chunks, resulting in lower memory use
 while parsing, but possibly mixed type inference.  To ensure no mixed
 types either set False, or specify the type with the <cite>dtype</cite> parameter.
 Note that the entire file is read into a single DeferredDataFrame regardless,
 use the <cite>chunksize</cite> or <cite>iterator</cite> parameter to return the data in chunks.
 (Only valid with C parser).</li>
 <li><strong>memory_map</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – If a filepath is provided for <cite>filepath_or_buffer</cite>, map the file object
 directly onto memory and access the data directly from there. Using this
 option can improve performance because there is no longer any I/O overhead.</li>
 <li><strong>float_precision</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – <p>Specifies which converter the C engine should use for floating-point
 values. The options are <code class="docutils literal notranslate"><span class="pre">None</span></code> or ‘high’ for the ordinary converter,
 ‘legacy’ for the original lower precision pandas converter, and
 ‘round_trip’ for the round-trip converter.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.</span></p>
 </div>
 </li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.</span></p>
 </div>
 </li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A comma-separated values (csv) file is returned as two-dimensional
 data structure with labeled axes.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or TextFileReader</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>If your files are large and records do not contain quoted newlines, you may
 pass the extra argument <code class="docutils literal notranslate"><span class="pre">splittable=True</span></code> to enable dynamic splitting for
 this read on newlines. Using this option for records that do contain quoted
 newlines may result in partial records and data corruption.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv()</span></code></dt>
 <dd>Write DeferredDataFrame to a comma-separated values (csv) file.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a></dt>
 <dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_fwf" title="apache_beam.dataframe.io.read_fwf"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_fwf()</span></code></a></dt>
 <dd>Read a table of fixed-width formatted lines into DeferredDataFrame.</dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;data.csv&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_csv">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_csv</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>transform_label=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#to_csv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.to_csv" title="Permalink to this definition">¶</a></dt>
 <dd><p>Write object to a comma-separated values (csv) file.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a><em>, </em><em>default None</em>) – <p>String, path object (implementing os.PathLike[str]), or file-like
 object implementing a write() function. If None, the result is
 returned as a string. If a non-binary file object is passed, it should
 be opened with <cite>newline=’’</cite>, disabling universal newlines. If a binary
 file object is passed, <cite>mode</cite> might need to contain a <cite>‘b’</cite>.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.0: </span>Support for binary file objects was introduced.</p>
 </div>
 </li>
 <li><strong>sep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '</em><em>,</em><em>'</em>) – String of length 1. Field delimiter for the output file.</li>
 <li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</li>
 <li><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>Callable</em><em>, </em><em>default None</em>) – Format string for floating point numbers. If a Callable is given, it takes
 precedence over other numeric formatting parameters, like decimal.</li>
 <li><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em>) – Columns to write.</li>
 <li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – Write out the column names. If a list of strings is given it is
 assumed to be aliases for the column names.</li>
 <li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</li>
 <li><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><em>sequence</em><em>, or </em><em>False</em><em>, </em><em>default None</em>) – Column label for index column(s) if desired. If None is given, and
 <cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
 sequence should be given if the object uses MultiIndex. If
 False do not print fields for index names. Use index_label=False
 for easier importing in R.</li>
 <li><strong>mode</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default 'w'</em>) – Python write mode. The available write modes are the same as
 <a class="reference external" href="https://docs.python.org/3/library/functions.html#open" title="(in Python v3.12)"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a>.</li>
 <li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – A string representing the encoding to use in the output file,
 defaults to ‘utf-8’. <cite>encoding</cite> is not supported if <cite>path_or_buf</cite>
 is a non-binary file object.</li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output data. If ‘infer’ and ‘path_or_buf’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdCompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for faster compression and to create
 a reproducible gzip archive:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'gzip',</span> <span class="pre">'compresslevel':</span> <span class="pre">1,</span> <span class="pre">'mtime':</span> <span class="pre">1}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.0.0: </span>May now be a dict with key ‘method’ as compression mode
 and other entries as additional compression options if
 compression mode is ‘zip’.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.1.0: </span>Passing compression options as keys in dict is
 supported for compression modes ‘gzip’, ‘bz2’, ‘zstd’, and ‘zip’.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.0: </span>Compression is supported for binary file objects.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.0: </span>Previous versions forwarded dict entries for ‘gzip’ to
 <cite>gzip.open</cite> instead of <cite>gzip.GzipFile</cite> which prevented
 setting <cite>mtime</cite>.</p>
 </div>
 </li>
 <li><strong>quoting</strong> (<em>optional constant from csv module</em>) – Defaults to csv.QUOTE_MINIMAL. If you have set a <cite>float_format</cite>
 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
 will treat them as non-numeric.</li>
 <li><strong>quotechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '&quot;'</em>) – String of length 1. Character used to quote fields.</li>
 <li><strong>lineterminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – <p>The newline character or character sequence to use in the output
 file. Defaults to <cite>os.linesep</cite>, which depends on the OS in which
 this method is called (‘\n’ for linux, ‘\r\n’ for Windows, i.e.).</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.5.0: </span>Previously was line_terminator, changed for consistency with
 read_csv and the standard library ‘csv’ module.</p>
 </div>
 </li>
 <li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a>) – Rows to write at a time.</li>
 <li><strong>date_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – Format string for datetime objects.</li>
 <li><strong>doublequote</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Control quoting of <cite>quotechar</cite> inside a field.</li>
 <li><strong>escapechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – String of length 1. Character used to escape <cite>sep</cite> and <cite>quotechar</cite>
 when appropriate.</li>
 <li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator. E.g. use ‘,’ for
 European data.</li>
 <li><strong>errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default 'strict'</em>) – <p>Specifies how encoding and decoding errors are to be handled.
 See the errors argument for <a class="reference external" href="https://docs.python.org/3/library/functions.html#open" title="(in Python v3.12)"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> for a full list
 of options.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.1.0.</span></p>
 </div>
 </li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If path_or_buf is None, returns the resulting csv format as a
 string. Otherwise returns None.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)">None</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)">str</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a></dt>
 <dd>Load a CSV file into a DeferredDataFrame.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.to_excel" title="apache_beam.dataframe.io.to_excel"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_excel()</span></code></a></dt>
 <dd>Write DeferredDataFrame to an Excel file.</dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Raphael&#39;</span><span class="p">,</span> <span class="s1">&#39;Donatello&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="s1">&#39;mask&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;red&#39;</span><span class="p">,</span> <span class="s1">&#39;purple&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="s1">&#39;weapon&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;sai&#39;</span><span class="p">,</span> <span class="s1">&#39;bo staff&#39;</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 <span class="go">&#39;name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n&#39;</span>

 <span class="go">Create &#39;out.zip&#39; containing &#39;out.csv&#39;</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">compression_opts</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">&#39;zip&#39;</span><span class="p">,</span>
 <span class="gp">... </span>                        <span class="n">archive_name</span><span class="o">=</span><span class="s1">&#39;out.csv&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;out.zip&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
 <span class="gp">... </span>          <span class="n">compression</span><span class="o">=</span><span class="n">compression_opts</span><span class="p">)</span>

 <span class="go">To write a csv file to a new folder or nested folder you will first</span>
 <span class="go">need to create it using either Pathlib or os:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">filepath</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="s1">&#39;folder/subfolder/out.csv&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">filepath</span><span class="o">.</span><span class="n">parent</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">parents</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">filepath</span><span class="p">)</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">os</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="s1">&#39;folder/subfolder&#39;</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;folder/subfolder/out.csv&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_fwf">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_fwf</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_fwf"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_fwf" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read a table of fixed-width formatted lines into DataFrame.</p>
 <p>Also supports optionally iterating or breaking of the file
 into chunks.</p>
 <p>Additional help can be found in the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html">online docs for IO Tools</a>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a text <code class="docutils literal notranslate"><span class="pre">read()</span></code> function.The string could be a URL.
 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.csv</span></code>.</li>
 <li><strong>colspecs</strong> (<em>list of tuple</em><em> (</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>) or </em><em>'infer'. optional</em>) – A list of tuples giving the extents of the fixed-width
 fields of each line as half-open intervals (i.e.,  [from, to[ ).
 String value ‘infer’ can be used to instruct the parser to try
 detecting the column specifications from the first 100 rows of
 the data which are not being skipped via skiprows (default=’infer’).</li>
 <li><strong>widths</strong> (<em>list of int</em><em>, </em><em>optional</em>) – A list of field widths which can be used instead of ‘colspecs’ if
 the intervals are contiguous.</li>
 <li><strong>infer_nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default 100</em>) – The number of rows to consider when letting the parser determine the
 <cite>colspecs</cite>.</li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 <li><strong>**kwds</strong> (<em>optional</em>) – Optional keyword arguments can be passed to <code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code>.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A comma-separated values (csv) file is returned as two-dimensional
 data structure with labeled axes.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or TextFileReader</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv()</span></code></dt>
 <dd>Write DeferredDataFrame to a comma-separated values (csv) file.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a></dt>
 <dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_fwf</span><span class="p">(</span><span class="s1">&#39;data.csv&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_json">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_json</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_json"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_json" title="Permalink to this definition">¶</a></dt>
 <dd><p>Convert a JSON string to pandas object.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path_or_buf</strong> (<em>a valid JSON str</em><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.json</span></code>.</p>
 <p>If you want to pass in a path object, pandas accepts any
 <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method,
 such as a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function)
 or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 </li>
 <li><strong>orient</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – <p>Indication of expected JSON string format.
 Compatible JSON strings can be produced by <code class="docutils literal notranslate"><span class="pre">to_json()</span></code> with a
 corresponding orient value.
 The set of possible orients is:</p>
 <ul>
 <li><code class="docutils literal notranslate"><span class="pre">'split'</span></code> : dict like
 <code class="docutils literal notranslate"><span class="pre">{index</span> <span class="pre">-&gt;</span> <span class="pre">[index],</span> <span class="pre">columns</span> <span class="pre">-&gt;</span> <span class="pre">[columns],</span> <span class="pre">data</span> <span class="pre">-&gt;</span> <span class="pre">[values]}</span></code></li>
 <li><code class="docutils literal notranslate"><span class="pre">'records'</span></code> : list like
 <code class="docutils literal notranslate"><span class="pre">[{column</span> <span class="pre">-&gt;</span> <span class="pre">value},</span> <span class="pre">...</span> <span class="pre">,</span> <span class="pre">{column</span> <span class="pre">-&gt;</span> <span class="pre">value}]</span></code></li>
 <li><code class="docutils literal notranslate"><span class="pre">'index'</span></code> : dict like <code class="docutils literal notranslate"><span class="pre">{index</span> <span class="pre">-&gt;</span> <span class="pre">{column</span> <span class="pre">-&gt;</span> <span class="pre">value}}</span></code></li>
 <li><code class="docutils literal notranslate"><span class="pre">'columns'</span></code> : dict like <code class="docutils literal notranslate"><span class="pre">{column</span> <span class="pre">-&gt;</span> <span class="pre">{index</span> <span class="pre">-&gt;</span> <span class="pre">value}}</span></code></li>
 <li><code class="docutils literal notranslate"><span class="pre">'values'</span></code> : just the values array</li>
 </ul>
 <p>The allowed and default values depend on the value
 of the <cite>typ</cite> parameter.</p>
 <ul>
 <li>when <code class="docutils literal notranslate"><span class="pre">typ</span> <span class="pre">==</span> <span class="pre">'series'</span></code>,<ul>
 <li>allowed orients are <code class="docutils literal notranslate"><span class="pre">{'split','records','index'}</span></code></li>
 <li>default is <code class="docutils literal notranslate"><span class="pre">'index'</span></code></li>
 <li>The DeferredSeries index must be unique for orient <code class="docutils literal notranslate"><span class="pre">'index'</span></code>.</li>
 </ul>
 </li>
 <li>when <code class="docutils literal notranslate"><span class="pre">typ</span> <span class="pre">==</span> <span class="pre">'frame'</span></code>,<ul>
 <li>allowed orients are <code class="docutils literal notranslate"><span class="pre">{'split','records','index',</span>
 <span class="pre">'columns','values',</span> <span class="pre">'table'}</span></code></li>
 <li>default is <code class="docutils literal notranslate"><span class="pre">'columns'</span></code></li>
 <li>The DeferredDataFrame index must be unique for orients <code class="docutils literal notranslate"><span class="pre">'index'</span></code> and
 <code class="docutils literal notranslate"><span class="pre">'columns'</span></code>.</li>
 <li>The DeferredDataFrame columns must be unique for orients <code class="docutils literal notranslate"><span class="pre">'index'</span></code>,
 <code class="docutils literal notranslate"><span class="pre">'columns'</span></code>, and <code class="docutils literal notranslate"><span class="pre">'records'</span></code>.</li>
 </ul>
 </li>
 </ul>
 </li>
 <li><strong>typ</strong> (<em>{'frame'</em><em>, </em><em>'series'}</em><em>, </em><em>default 'frame'</em>) – The type of object to recover.</li>
 <li><strong>dtype</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default None</em>) – <p>If True, infer dtypes; if a dict of column to dtype, then use those;
 if False, then don’t infer dtypes at all, applies only to the data.</p>
 <p>For all <code class="docutils literal notranslate"><span class="pre">orient</span></code> values except <code class="docutils literal notranslate"><span class="pre">'table'</span></code>, default is True.</p>
 </li>
 <li><strong>convert_axes</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default None</em>) – <p>Try to convert the axes to the proper dtypes.</p>
 <p>For all <code class="docutils literal notranslate"><span class="pre">orient</span></code> values except <code class="docutils literal notranslate"><span class="pre">'table'</span></code>, default is True.</p>
 </li>
 <li><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – If True then default datelike columns may be converted (depending on
 keep_default_dates).
 If False, no dates will be converted.
 If a list of column names, then those columns will be converted and
 default datelike columns may also be converted (depending on
 keep_default_dates).</li>
 <li><strong>keep_default_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>If parsing dates (convert_dates is not False), then try to parse the
 default datelike columns.
 A column label is datelike if</p>
 <ul>
 <li>it ends with <code class="docutils literal notranslate"><span class="pre">'_at'</span></code>,</li>
 <li>it ends with <code class="docutils literal notranslate"><span class="pre">'_time'</span></code>,</li>
 <li>it begins with <code class="docutils literal notranslate"><span class="pre">'timestamp'</span></code>,</li>
 <li>it is <code class="docutils literal notranslate"><span class="pre">'modified'</span></code>, or</li>
 <li>it is <code class="docutils literal notranslate"><span class="pre">'date'</span></code>.</li>
 </ul>
 </li>
 <li><strong>precise_float</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Set to enable usage of higher precision (strtod) function when
 decoding string to double values. Default (False) is to use fast but
 less precise builtin functionality.</li>
 <li><strong>date_unit</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – The timestamp unit to detect if converting dates. The default behaviour
 is to try and detect the correct precision, but if this is not desired
 then pass one of ‘s’, ‘ms’, ‘us’ or ‘ns’ to force parsing only seconds,
 milliseconds, microseconds or nanoseconds respectively.</li>
 <li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default is 'utf-8'</em>) – The encoding to use to decode py3 bytes.</li>
 <li><strong>encoding_errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default &quot;strict&quot;</em>) – <p>How encoding errors are treated. <a class="reference external" href="https://docs.python.org/3/library/codecs.html#error-handlers">List of possible values</a> .</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.3.0.</span></p>
 </div>
 </li>
 <li><strong>lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Read the file as a json object per line.</li>
 <li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – <p>Return JsonReader object for iteration.
 See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json">line-delimited json docs</a>
 for more information on <code class="docutils literal notranslate"><span class="pre">chunksize</span></code>.
 This can only be passed if <cite>lines=True</cite>.
 If this is None, the file will be read into memory all at once.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2: </span><code class="docutils literal notranslate"><span class="pre">JsonReader</span></code> is a context manager.</p>
 </div>
 </li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘path_or_buf’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </li>
 <li><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – <p>The number of lines from the line-delimited jsonfile that has to be read.
 This can only be passed if <cite>lines=True</cite>.
 If this is None, all the rows will be returned.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.1.</span></p>
 </div>
 </li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 <li><strong>engine</strong> (<em>{&quot;ujson&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>default &quot;ujson&quot;</em>) – <p>Parser engine to use. The <code class="docutils literal notranslate"><span class="pre">&quot;pyarrow&quot;</span></code> engine is only available when
 <code class="docutils literal notranslate"><span class="pre">lines=True</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The type returned depends on the value of <cite>typ</cite>.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_json()</span></code></dt>
 <dd>Convert a DeferredDataFrame to a JSON string.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredSeries.to_json()</span></code></dt>
 <dd>Convert a DeferredSeries to a JSON string.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">json_normalize()</span></code></dt>
 <dd>Normalize semi-structured JSON data into a flat table.</dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>Specific to <code class="docutils literal notranslate"><span class="pre">orient='table'</span></code>, if a <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> with a literal
 <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code> name of <cite>index</cite> gets written with <a class="reference internal" href="#apache_beam.dataframe.io.to_json" title="apache_beam.dataframe.io.to_json"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_json()</span></code></a>, the
 subsequent read operation will incorrectly set the <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code> name to
 <code class="docutils literal notranslate"><span class="pre">None</span></code>. This is because <cite>index</cite> is also used by <code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_json()</span></code>
 to denote a missing <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code> name, and the subsequent
 <a class="reference internal" href="#apache_beam.dataframe.io.read_json" title="apache_beam.dataframe.io.read_json"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_json()</span></code></a> operation cannot distinguish between the two. The same
 limitation is encountered with a <code class="xref py py-class docutils literal notranslate"><span class="pre">MultiIndex</span></code> and any names
 beginning with <code class="docutils literal notranslate"><span class="pre">'level_'</span></code>.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]],</span>
 <span class="gp">... </span>                  <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;row 1&#39;</span><span class="p">,</span> <span class="s1">&#39;row 2&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                  <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col 1&#39;</span><span class="p">,</span> <span class="s1">&#39;col 2&#39;</span><span class="p">])</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;split&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;split&#39;</span><span class="p">)</span>
 <span class="go">    &#39;{&quot;columns&quot;:[&quot;col 1&quot;,&quot;col 2&quot;],&quot;index&quot;:[&quot;row 1&quot;,&quot;row 2&quot;],&quot;data&quot;:[[&quot;a&quot;,&quot;b&quot;],[&quot;c&quot;,&quot;d&quot;]]}&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">_</span><span class="p">,</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;split&#39;</span><span class="p">)</span>
 <span class="go">      col 1 col 2</span>
 <span class="go">row 1     a     b</span>
 <span class="go">row 2     c     d</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;index&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
 <span class="go">&#39;{&quot;row 1&quot;:{&quot;col 1&quot;:&quot;a&quot;,&quot;col 2&quot;:&quot;b&quot;},&quot;row 2&quot;:{&quot;col 1&quot;:&quot;c&quot;,&quot;col 2&quot;:&quot;d&quot;}}&#39;</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">_</span><span class="p">,</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
 <span class="go">      col 1 col 2</span>
 <span class="go">row 1     a     b</span>
 <span class="go">row 2     c     d</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;records&#39;`` formatted JSON.</span>
 <span class="go">Note that index labels are not preserved with this encoding.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;records&#39;</span><span class="p">)</span>
 <span class="go">&#39;[{&quot;col 1&quot;:&quot;a&quot;,&quot;col 2&quot;:&quot;b&quot;},{&quot;col 1&quot;:&quot;c&quot;,&quot;col 2&quot;:&quot;d&quot;}]&#39;</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="n">_</span><span class="p">,</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;records&#39;</span><span class="p">)</span>
 <span class="go">  col 1 col 2</span>
 <span class="go">0     a     b</span>
 <span class="go">1     c     d</span>

 <span class="go">Encoding with Table Schema</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s1">&#39;table&#39;</span><span class="p">)</span>
 <span class="go">    &#39;{&quot;schema&quot;:{&quot;fields&quot;:[{&quot;name&quot;:&quot;index&quot;,&quot;type&quot;:&quot;string&quot;},{&quot;name&quot;:&quot;col 1&quot;,&quot;type&quot;:&quot;string&quot;},{&quot;name&quot;:&quot;col 2&quot;,&quot;type&quot;:&quot;string&quot;}],&quot;primaryKey&quot;:[&quot;index&quot;],&quot;pandas_version&quot;:&quot;1.4.0&quot;},&quot;data&quot;:[{&quot;index&quot;:&quot;row 1&quot;,&quot;col 1&quot;:&quot;a&quot;,&quot;col 2&quot;:&quot;b&quot;},{&quot;index&quot;:&quot;row 2&quot;,&quot;col 1&quot;:&quot;c&quot;,&quot;col 2&quot;:&quot;d&quot;}]}&#39;</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_json">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_json</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>orient=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#to_json"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.to_json" title="Permalink to this definition">¶</a></dt>
 <dd><p>Convert the object to a JSON string.</p>
 <p>Note NaN’s and None will be converted to null and datetime objects
 will be converted to UNIX timestamps.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a><em>, </em><em>default None</em>) – String, path object (implementing os.PathLike[str]), or file-like
 object implementing a write() function. If None, the result is
 returned as a string.</li>
 <li><strong>orient</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a>) – <p>Indication of expected JSON string format.</p>
 <ul>
 <li>DeferredSeries:<blockquote>
 <div><ul>
 <li>default is ‘index’</li>
 <li>allowed values are: {‘split’, ‘records’, ‘index’, ‘table’}.</li>
 </ul>
 </div></blockquote>
 </li>
 <li>DeferredDataFrame:<blockquote>
 <div><ul>
 <li>default is ‘columns’</li>
 <li>allowed values are: {‘split’, ‘records’, ‘index’, ‘columns’,
 ‘values’, ‘table’}.</li>
 </ul>
 </div></blockquote>
 </li>
 <li>The format of the JSON string:<blockquote>
 <div><ul>
 <li>’split’ : dict like {‘index’ -&gt; [index], ‘columns’ -&gt; [columns],
 ‘data’ -&gt; [values]}</li>
 <li>’records’ : list like [{column -&gt; value}, … , {column -&gt; value}]</li>
 <li>’index’ : dict like {index -&gt; {column -&gt; value}}</li>
 <li>’columns’ : dict like {column -&gt; {index -&gt; value}}</li>
 <li>’values’ : just the values array</li>
 <li>’table’ : dict like {‘schema’: {schema}, ‘data’: {data}}</li>
 </ul>
 <p>Describing the data, where data component is like <code class="docutils literal notranslate"><span class="pre">orient='records'</span></code>.</p>
 </div></blockquote>
 </li>
 </ul>
 </li>
 <li><strong>date_format</strong> (<em>{None</em><em>, </em><em>'epoch'</em><em>, </em><em>'iso'}</em>) – Type of date conversion. ‘epoch’ = epoch milliseconds,
 ‘iso’ = ISO8601. The default depends on the <cite>orient</cite>. For
 <code class="docutils literal notranslate"><span class="pre">orient='table'</span></code>, the default is ‘iso’. For all other orients,
 the default is ‘epoch’.</li>
 <li><strong>double_precision</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default 10</em>) – The number of decimal places to use when encoding
 floating point values.</li>
 <li><strong>force_ascii</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Force encoded string to be ASCII.</li>
 <li><strong>date_unit</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default 'ms'</em><em> (</em><em>milliseconds</em><em>)</em>) – The time unit to encode to, governs timestamp and ISO8601
 precision.  One of ‘s’, ‘ms’, ‘us’, ‘ns’ for second, millisecond,
 microsecond, and nanosecond respectively.</li>
 <li><strong>default_handler</strong> (<em>callable</em><em>, </em><em>default None</em>) – Handler to call if object cannot otherwise be converted to a
 suitable format for JSON. Should receive a single argument which is
 the object to convert and return a serialisable object.</li>
 <li><strong>lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – If ‘orient’ is ‘records’ write out line-delimited json format. Will
 throw ValueError if incorrect ‘orient’ since others are not
 list-like.</li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output data. If ‘infer’ and ‘path_or_buf’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdCompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for faster compression and to create
 a reproducible gzip archive:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'gzip',</span> <span class="pre">'compresslevel':</span> <span class="pre">1,</span> <span class="pre">'mtime':</span> <span class="pre">1}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </li>
 <li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether to include the index values in the JSON string. Not
 including the index (<code class="docutils literal notranslate"><span class="pre">index=False</span></code>) is only supported when
 orient is ‘split’ or ‘table’.</li>
 <li><strong>indent</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – Length of whitespace used to indent each record.</li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>mode</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default 'w'</em><em> (</em><em>writing</em><em>)</em>) – Specify the IO mode for output when supplying a path_or_buf.
 Accepted args are ‘w’ (writing) and ‘a’ (append) only.
 mode=’a’ is only supported when lines is True and orient is ‘records’.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If path_or_buf is None, returns the resulting json format as a
 string. Otherwise returns None.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)">None</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)">str</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_json" title="apache_beam.dataframe.io.read_json"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_json()</span></code></a></dt>
 <dd>Convert a JSON string to pandas object.</dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>The behavior of <code class="docutils literal notranslate"><span class="pre">indent=0</span></code> varies from the stdlib, which does not
 indent the output but does insert newlines. Currently, <code class="docutils literal notranslate"><span class="pre">indent=0</span></code>
 and the default <code class="docutils literal notranslate"><span class="pre">indent=None</span></code> are equivalent in pandas, though this
 may change in a future release.</p>
 <p><code class="docutils literal notranslate"><span class="pre">orient='table'</span></code> contains a ‘pandas_version’ field under ‘schema’.
 This stores the version of <cite>pandas</cite> used in the latest revision of the
 schema.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span><span class="p">,</span> <span class="n">dumps</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
 <span class="gp">... </span>    <span class="p">[[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="s2">&quot;d&quot;</span><span class="p">]],</span>
 <span class="gp">... </span>    <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;row 1&quot;</span><span class="p">,</span> <span class="s2">&quot;row 2&quot;</span><span class="p">],</span>
 <span class="gp">... </span>    <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;col 1&quot;</span><span class="p">,</span> <span class="s2">&quot;col 2&quot;</span><span class="p">],</span>
 <span class="gp">... </span><span class="p">)</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;split&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;columns&quot;: [</span>
 <span class="go">        &quot;col 1&quot;,</span>
 <span class="go">        &quot;col 2&quot;</span>
 <span class="go">    ],</span>
 <span class="go">    &quot;index&quot;: [</span>
 <span class="go">        &quot;row 1&quot;,</span>
 <span class="go">        &quot;row 2&quot;</span>
 <span class="go">    ],</span>
 <span class="go">    &quot;data&quot;: [</span>
 <span class="go">        [</span>
 <span class="go">            &quot;a&quot;,</span>
 <span class="go">            &quot;b&quot;</span>
 <span class="go">        ],</span>
 <span class="go">        [</span>
 <span class="go">            &quot;c&quot;,</span>
 <span class="go">            &quot;d&quot;</span>
 <span class="go">        ]</span>
 <span class="go">    ]</span>
 <span class="go">}</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;records&#39;`` formatted JSON.</span>
 <span class="go">Note that index labels are not preserved with this encoding.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;records&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">[</span>
 <span class="go">    {</span>
 <span class="go">        &quot;col 1&quot;: &quot;a&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;b&quot;</span>
 <span class="go">    },</span>
 <span class="go">    {</span>
 <span class="go">        &quot;col 1&quot;: &quot;c&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;d&quot;</span>
 <span class="go">    }</span>
 <span class="go">]</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;index&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;index&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;row 1&quot;: {</span>
 <span class="go">        &quot;col 1&quot;: &quot;a&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;b&quot;</span>
 <span class="go">    },</span>
 <span class="go">    &quot;row 2&quot;: {</span>
 <span class="go">        &quot;col 1&quot;: &quot;c&quot;,</span>
 <span class="go">        &quot;col 2&quot;: &quot;d&quot;</span>
 <span class="go">    }</span>
 <span class="go">}</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;columns&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;col 1&quot;: {</span>
 <span class="go">        &quot;row 1&quot;: &quot;a&quot;,</span>
 <span class="go">        &quot;row 2&quot;: &quot;c&quot;</span>
 <span class="go">    },</span>
 <span class="go">    &quot;col 2&quot;: {</span>
 <span class="go">        &quot;row 1&quot;: &quot;b&quot;,</span>
 <span class="go">        &quot;row 2&quot;: &quot;d&quot;</span>
 <span class="go">    }</span>
 <span class="go">}</span>

 <span class="go">Encoding/decoding a Dataframe using ``&#39;values&#39;`` formatted JSON:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;values&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">[</span>
 <span class="go">    [</span>
 <span class="go">        &quot;a&quot;,</span>
 <span class="go">        &quot;b&quot;</span>
 <span class="go">    ],</span>
 <span class="go">    [</span>
 <span class="go">        &quot;c&quot;,</span>
 <span class="go">        &quot;d&quot;</span>
 <span class="go">    ]</span>
 <span class="go">]</span>

 <span class="go">Encoding with Table Schema:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;table&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
 <span class="go">{</span>
 <span class="go">    &quot;schema&quot;: {</span>
 <span class="go">        &quot;fields&quot;: [</span>
 <span class="go">            {</span>
 <span class="go">                &quot;name&quot;: &quot;index&quot;,</span>
 <span class="go">                &quot;type&quot;: &quot;string&quot;</span>
 <span class="go">            },</span>
 <span class="go">            {</span>
 <span class="go">                &quot;name&quot;: &quot;col 1&quot;,</span>
 <span class="go">                &quot;type&quot;: &quot;string&quot;</span>
 <span class="go">            },</span>
 <span class="go">            {</span>
 <span class="go">                &quot;name&quot;: &quot;col 2&quot;,</span>
 <span class="go">                &quot;type&quot;: &quot;string&quot;</span>
 <span class="go">            }</span>
 <span class="go">        ],</span>
 <span class="go">        &quot;primaryKey&quot;: [</span>
 <span class="go">            &quot;index&quot;</span>
 <span class="go">        ],</span>
 <span class="go">        &quot;pandas_version&quot;: &quot;1.4.0&quot;</span>
 <span class="go">    },</span>
 <span class="go">    &quot;data&quot;: [</span>
 <span class="go">        {</span>
 <span class="go">            &quot;index&quot;: &quot;row 1&quot;,</span>
 <span class="go">            &quot;col 1&quot;: &quot;a&quot;,</span>
 <span class="go">            &quot;col 2&quot;: &quot;b&quot;</span>
 <span class="go">        },</span>
 <span class="go">        {</span>
 <span class="go">            &quot;index&quot;: &quot;row 2&quot;,</span>
 <span class="go">            &quot;col 1&quot;: &quot;c&quot;,</span>
 <span class="go">            &quot;col 2&quot;: &quot;d&quot;</span>
 <span class="go">        }</span>
 <span class="go">    ]</span>
 <span class="go">}</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_html">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_html</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#read_html"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.read_html" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read HTML tables into a <code class="docutils literal notranslate"><span class="pre">list</span></code> of <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> objects.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>io</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a string <code class="docutils literal notranslate"><span class="pre">read()</span></code> function.
 The string can represent a URL or the HTML itself. Note that
 lxml only accepts the http, ftp and file url protocols. If you have a
 URL that starts with <code class="docutils literal notranslate"><span class="pre">'https'</span></code> you might try removing the <code class="docutils literal notranslate"><span class="pre">'s'</span></code>.</li>
 <li><strong>match</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><em>compiled regular expression</em><em>, </em><em>optional</em>) – The set of tables containing text matching this regex or string will be
 returned. Unless the HTML is extremely simple you will probably need to
 pass a non-empty string here. Defaults to ‘.+’ (match any non-empty
 string). The default value will return all tables contained on a page.
 This value is converted to a regular expression so that there is
 consistent behavior between Beautiful Soup and lxml.</li>
 <li><strong>flavor</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – The parsing engine to use. ‘bs4’ and ‘html5lib’ are synonymous with
 each other, they are both there for backwards compatibility. The
 default of <code class="docutils literal notranslate"><span class="pre">None</span></code> tries to use <code class="docutils literal notranslate"><span class="pre">lxml</span></code> to parse and if that fails it
 falls back on <code class="docutils literal notranslate"><span class="pre">bs4</span></code> + <code class="docutils literal notranslate"><span class="pre">html5lib</span></code>.</li>
 <li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em> or </em><em>list-like</em><em>, </em><em>optional</em>) – The row (or list of rows for a <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.MultiIndex.html#pandas.MultiIndex" title="(in pandas v3.0.0.dev0+802.g7c836ed2ec)"><code class="xref py py-class docutils literal notranslate"><span class="pre">MultiIndex</span></code></a>) to use to
 make the columns headers.</li>
 <li><strong>index_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em> or </em><em>list-like</em><em>, </em><em>optional</em>) – The column (or list of columns) to use to create the index.</li>
 <li><strong>skiprows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>list-like</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#slice" title="(in Python v3.12)"><em>slice</em></a><em>, </em><em>optional</em>) – Number of rows to skip after parsing the column integer. 0-based. If a
 sequence of integers or a slice is given, will skip the rows indexed by
 that sequence.  Note that a single element sequence means ‘skip the nth
 row’ whereas an integer means ‘skip n rows’.</li>
 <li><strong>attrs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>This is a dictionary of attributes that you can pass to use to identify
 the table in the HTML. These are not checked for validity before being
 passed to lxml or Beautiful Soup. However, these attributes must be
 valid HTML table attributes to work correctly. For example,</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">attrs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;id&#39;</span><span class="p">:</span> <span class="s1">&#39;table&#39;</span><span class="p">}</span>
 </pre></div>
 </div>
 <p>is a valid attribute dictionary because the ‘id’ HTML tag attribute is
 a valid HTML attribute for <em>any</em> HTML tag as per <a class="reference external" href="https://html.spec.whatwg.org/multipage/dom.html#global-attributes">this document</a>.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">attrs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;asdf&#39;</span><span class="p">:</span> <span class="s1">&#39;table&#39;</span><span class="p">}</span>
 </pre></div>
 </div>
 <p>is <em>not</em> a valid attribute dictionary because ‘asdf’ is not a valid
 HTML attribute even if it is a valid XML attribute.  Valid HTML 4.01
 table attributes can be found <a class="reference external" href="http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2">here</a>. A
 working draft of the HTML 5 spec can be found <a class="reference external" href="https://html.spec.whatwg.org/multipage/tables.html">here</a>. It contains the
 latest information on table attributes for the modern web.</p>
 </li>
 <li><strong>parse_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – See <a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a> for more details.</li>
 <li><strong>thousands</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – Separator to use to parse thousands. Defaults to <code class="docutils literal notranslate"><span class="pre">','</span></code>.</li>
 <li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – The encoding used to decode the web page. Defaults to <code class="docutils literal notranslate"><span class="pre">None</span></code>.``None``
 preserves the previous encoding behavior, which depends on the
 underlying parser library (e.g., the parser library will try to use
 the encoding provided by the document).</li>
 <li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character to recognize as decimal point (e.g. use ‘,’ for European
 data).</li>
 <li><strong>converters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default None</em>) – Dict of functions for converting values in certain columns. Keys can
 either be integers or column labels, values are functions that take one
 input argument, the cell (not column) content, and return the
 transformed content.</li>
 <li><strong>na_values</strong> (<em>iterable</em><em>, </em><em>default None</em>) – Custom NA values.</li>
 <li><strong>keep_default_na</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – If na_values are specified and keep_default_na is False the default NaN
 values are overridden, otherwise they’re appended to.</li>
 <li><strong>displayed_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether elements with “display: none” should be parsed.</li>
 <li><strong>extract_links</strong> (<em>{None</em><em>, </em><em>&quot;all&quot;</em><em>, </em><em>&quot;header&quot;</em><em>, </em><em>&quot;body&quot;</em><em>, </em><em>&quot;footer&quot;}</em>) – <p>Table elements in the specified section(s) with &lt;a&gt; tags will have their
 href extracted.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0.</span></p>
 </div>
 </li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of DeferredDataFrames.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dfs</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a></dt>
 <dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>Before using this function you should read the <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-html-gotchas" title="(in pandas v3.0.0.dev0+802.g7c836ed2ec)"><span class="xref std std-ref">gotchas about the
 HTML parsing libraries</span></a>.</p>
 <p>Expect to do some cleanup after you call this function. For example, you
 might need to manually assign column names if the column names are
 converted to NaN when you pass the <cite>header=0</cite> argument. We try to assume as
 little as possible about the structure of the table and push the
 idiosyncrasies of the HTML contained in the table to the user.</p>
 <p>This function searches for <code class="docutils literal notranslate"><span class="pre">&lt;table&gt;</span></code> elements and only for <code class="docutils literal notranslate"><span class="pre">&lt;tr&gt;</span></code>
 and <code class="docutils literal notranslate"><span class="pre">&lt;th&gt;</span></code> rows and <code class="docutils literal notranslate"><span class="pre">&lt;td&gt;</span></code> elements within each <code class="docutils literal notranslate"><span class="pre">&lt;tr&gt;</span></code> or <code class="docutils literal notranslate"><span class="pre">&lt;th&gt;</span></code>
 element in the table. <code class="docutils literal notranslate"><span class="pre">&lt;td&gt;</span></code> stands for “table data”. This function
 attempts to properly handle <code class="docutils literal notranslate"><span class="pre">colspan</span></code> and <code class="docutils literal notranslate"><span class="pre">rowspan</span></code> attributes.
 If the function has a <code class="docutils literal notranslate"><span class="pre">&lt;thead&gt;</span></code> argument, it is used to construct
 the header, otherwise the function attempts to find the header within
 the body (by putting rows with only <code class="docutils literal notranslate"><span class="pre">&lt;th&gt;</span></code> elements into the header).</p>
 <p>Similar to <a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a> the <cite>header</cite> argument is applied
 <strong>after</strong> <cite>skiprows</cite> is applied.</p>
 <p>This function will <em>always</em> return a list of <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> <em>or</em>
 it will fail, e.g., it will <em>not</em> return an empty list.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">See the :ref:`read_html documentation in the IO section of the docs</span>
 <span class="go">&lt;io.read_html&gt;` for some examples of reading in HTML tables.</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_html">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_html</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#to_html"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.to_html" title="Permalink to this definition">¶</a></dt>
 <dd><p>Render a DataFrame as an HTML table.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>Path</em><em> or </em><em>StringIO-like</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Buffer to write to. If None, the output is returned as a string.</li>
 <li><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – The subset of columns to write. Writes all columns by default.</li>
 <li><strong>col_space</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em> or </em><em>dict of int</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – The minimum width of each column in CSS length units.  An int is assumed to be px units..</li>
 <li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em>) – Whether to print column labels, default True.</li>
 <li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Whether to print index (row) labels.</li>
 <li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default 'NaN'</em>) – String representation of <code class="docutils literal notranslate"><span class="pre">NaN</span></code> to use.</li>
 <li><strong>formatters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.12)"><em>tuple</em></a><em> or </em><em>dict of one-param. functions</em><em>, </em><em>optional</em>) – Formatter functions to apply to columns’ elements by position or
 name.
 The result of each function must be a unicode string.
 List/tuple must be of length equal to the number of columns.</li>
 <li><strong>float_format</strong> (<em>one-parameter function</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – <p>Formatter function to apply to columns’ elements if they are
 floats. This function must return a unicode string and will be
 applied only to the non-<code class="docutils literal notranslate"><span class="pre">NaN</span></code> elements, with <code class="docutils literal notranslate"><span class="pre">NaN</span></code> being
 handled by <code class="docutils literal notranslate"><span class="pre">na_rep</span></code>.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>sparsify</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Set to False for a DeferredDataFrame with a hierarchical index to print
 every multiindex key at each row.</li>
 <li><strong>index_names</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Prints the names of the indexes.</li>
 <li><strong>justify</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – <p>How to justify the column labels. If None uses the option from
 the print configuration (controlled by set_option), ‘right’ out
 of the box. Valid values are</p>
 <ul>
 <li>left</li>
 <li>right</li>
 <li>center</li>
 <li>justify</li>
 <li>justify-all</li>
 <li>start</li>
 <li>end</li>
 <li>inherit</li>
 <li>match-parent</li>
 <li>initial</li>
 <li>unset.</li>
 </ul>
 </li>
 <li><strong>max_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of rows to display in the console.</li>
 <li><strong>max_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of columns to display in the console.</li>
 <li><strong>show_dimensions</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Display DeferredDataFrame dimensions (number of rows by number of columns).</li>
 <li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator, e.g. ‘,’ in Europe.</li>
 <li><strong>bold_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Make the row labels bold in the output.</li>
 <li><strong>classes</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.12)"><em>tuple</em></a><em>, </em><em>default None</em>) – CSS class(es) to apply to the resulting html table.</li>
 <li><strong>escape</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Convert the characters &lt;, &gt;, and &amp; to HTML-safe sequences.</li>
 <li><strong>notebook</strong> (<em>{True</em><em>, </em><em>False}</em><em>, </em><em>default False</em>) – Whether the generated HTML is for IPython Notebook.</li>
 <li><strong>border</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – A <code class="docutils literal notranslate"><span class="pre">border=border</span></code> attribute is included in the opening
 <cite>&lt;table&gt;</cite> tag. Default <code class="docutils literal notranslate"><span class="pre">pd.options.display.html.border</span></code>.</li>
 <li><strong>table_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – A css id is included in the opening <cite>&lt;table&gt;</cite> tag if specified.</li>
 <li><strong>render_links</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Convert URLs to HTML links.</li>
 <li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default &quot;utf-8&quot;</em>) – <p>Set character encoding.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If buf is None, returns the result as a string. Otherwise returns
 None.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)">str</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)">None</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">to_string()</span></code></dt>
 <dd>Convert DeferredDataFrame to a string.</dd>
 </dl>
 </div>
 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.dataframe.io.ReadViaPandas">
 <em class="property">class </em><code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">ReadViaPandas</code><span class="sig-paren">(</span><em>format</em>, <em>*args</em>, <em>include_indexes=False</em>, <em>objects_as_strings=True</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#ReadViaPandas"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.ReadViaPandas" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
 <dl class="method">
 <dt id="apache_beam.dataframe.io.ReadViaPandas.expand">
 <code class="descname">expand</code><span class="sig-paren">(</span><em>p</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#ReadViaPandas.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.ReadViaPandas.expand" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.dataframe.io.WriteViaPandas">
 <em class="property">class </em><code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">WriteViaPandas</code><span class="sig-paren">(</span><em>format</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#WriteViaPandas"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.WriteViaPandas" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
 <dl class="method">
 <dt id="apache_beam.dataframe.io.WriteViaPandas.expand">
 <code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/io.html#WriteViaPandas.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.io.WriteViaPandas.expand" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_excel">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_excel</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_excel" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read an Excel file into a pandas DataFrame.</p>
 <p>Supports <cite>xls</cite>, <cite>xlsx</cite>, <cite>xlsm</cite>, <cite>xlsb</cite>, <cite>odf</cite>, <cite>ods</cite> and <cite>odt</cite> file extensions
 read from a local filesystem or URL. Supports an option to read
 a single sheet or a list of sheets.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>io</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#bytes" title="(in Python v3.12)"><em>bytes</em></a><em>, </em><em>ExcelFile</em><em>, </em><em>xlrd.Book</em><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be: <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.xlsx</span></code>.</p>
 <p>If you want to pass in a path object, pandas accepts any <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method,
 such as a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function)
 or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 </li>
 <li><strong>sheet_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a><em>, </em><em>default 0</em>) – <p>Strings are used for sheet names. Integers are used in zero-indexed
 sheet positions (chart sheets do not count as a sheet position).
 Lists of strings/integers are used to request multiple sheets.
 Specify None to get all worksheets.</p>
 <p>Available cases:</p>
 <ul>
 <li>Defaults to <code class="docutils literal notranslate"><span class="pre">0</span></code>: 1st sheet as a <cite>DeferredDataFrame</cite></li>
 <li><code class="docutils literal notranslate"><span class="pre">1</span></code>: 2nd sheet as a <cite>DeferredDataFrame</cite></li>
 <li><code class="docutils literal notranslate"><span class="pre">&quot;Sheet1&quot;</span></code>: Load sheet with name “Sheet1”</li>
 <li><code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">1,</span> <span class="pre">&quot;Sheet5&quot;]</span></code>: Load first, second and sheet named “Sheet5”
 as a dict of <cite>DeferredDataFrame</cite></li>
 <li>None: All worksheets.</li>
 </ul>
 </li>
 <li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>list of int</em><em>, </em><em>default 0</em>) – Row (0-indexed) to use for the column labels of the parsed
 DeferredDataFrame. If a list of integers is passed those row positions will
 be combined into a <code class="docutils literal notranslate"><span class="pre">MultiIndex</span></code>. Use None if there is no header.</li>
 <li><strong>names</strong> (<em>array-like</em><em>, </em><em>default None</em>) – List of column names to use. If file contains no header row,
 then you should explicitly pass header=None.</li>
 <li><strong>index_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>list of int</em><em>, </em><em>default None</em>) – <p>Column (0-indexed) to use as the row labels of the DeferredDataFrame.
 Pass None if there is no such column.  If a list is passed,
 those columns will be combined into a <code class="docutils literal notranslate"><span class="pre">MultiIndex</span></code>.  If a
 subset of data is selected with <code class="docutils literal notranslate"><span class="pre">usecols</span></code>, index_col
 is based on the subset.</p>
 <p>Missing values will be forward filled to allow roundtripping with
 <code class="docutils literal notranslate"><span class="pre">to_excel</span></code> for <code class="docutils literal notranslate"><span class="pre">merged_cells=True</span></code>. To avoid forward filling the
 missing values use <code class="docutils literal notranslate"><span class="pre">set_index</span></code> after reading the data instead of
 <code class="docutils literal notranslate"><span class="pre">index_col</span></code>.</p>
 </li>
 <li><strong>usecols</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>list-like</em><em>, or </em><em>callable</em><em>, </em><em>default None</em>) – <ul>
 <li>If None, then parse all columns.</li>
 <li>If str, then indicates comma separated list of Excel column letters
 and column ranges (e.g. “A:E” or “A,C,E:F”). Ranges are inclusive of
 both sides.</li>
 <li>If list of int, then indicates list of column numbers to be parsed
 (0-indexed).</li>
 <li>If list of string, then indicates list of column names to be parsed.</li>
 <li>If callable, then evaluate each column name against it and parse the
 column if the callable returns <code class="docutils literal notranslate"><span class="pre">True</span></code>.</li>
 </ul>
 <p>Returns a subset of the columns according to behavior above.</p>
 </li>
 <li><strong>dtype</strong> (<em>Type name</em><em> or </em><em>dict of column -&gt; type</em><em>, </em><em>default None</em>) – Data type for data or columns. E.g. {‘a’: np.float64, ‘b’: np.int32}
 Use <cite>object</cite> to preserve data as stored in Excel and not interpret dtype.
 If converters are specified, they will be applied INSTEAD
 of dtype conversion.</li>
 <li><strong>engine</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – <p>If io is not a buffer or path, this must be set to identify io.
 Supported engines: “xlrd”, “openpyxl”, “odf”, “pyxlsb”.
 Engine compatibility :</p>
 <ul>
 <li>”xlrd” supports old-style Excel files (.xls).</li>
 <li>”openpyxl” supports newer Excel file formats.</li>
 <li>”odf” supports OpenDocument file formats (.odf, .ods, .odt).</li>
 <li>”pyxlsb” supports Binary Excel files.</li>
 </ul>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.0: </span>The engine <a class="reference external" href="https://xlrd.readthedocs.io/en/latest/">xlrd</a>
  now only supports old-style <code class="docutils literal notranslate"><span class="pre">.xls</span></code> files.
  When <code class="docutils literal notranslate"><span class="pre">engine=None</span></code>, the following logic will be
  used to determine the engine:</p>
 <ul>
 <li>If <code class="docutils literal notranslate"><span class="pre">path_or_buffer</span></code> is an OpenDocument format (.odf, .ods, .odt),
 then <a class="reference external" href="https://pypi.org/project/odfpy/">odf</a> will be used.</li>
 <li>Otherwise if <code class="docutils literal notranslate"><span class="pre">path_or_buffer</span></code> is an xls format,
 <code class="docutils literal notranslate"><span class="pre">xlrd</span></code> will be used.</li>
 <li>Otherwise if <code class="docutils literal notranslate"><span class="pre">path_or_buffer</span></code> is in xlsb format,
 <code class="docutils literal notranslate"><span class="pre">pyxlsb</span></code> will be used.<div class="versionadded">
 <p><span class="versionmodified">New in version 1.3.0.</span></p>
 </div>
 </li>
 <li>Otherwise <code class="docutils literal notranslate"><span class="pre">openpyxl</span></code> will be used.<div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.3.0.</span></p>
 </div>
 </li>
 </ul>
 </div>
 </li>
 <li><strong>converters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default None</em>) – Dict of functions for converting values in certain columns. Keys can
 either be integers or column labels, values are functions that take one
 input argument, the Excel cell content, and return the transformed
 content.</li>
 <li><strong>true_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>default None</em>) – Values to consider as True.</li>
 <li><strong>false_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>default None</em>) – Values to consider as False.</li>
 <li><strong>skiprows</strong> (<em>list-like</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, or </em><em>callable</em><em>, </em><em>optional</em>) – Line numbers to skip (0-indexed) or number of lines to skip (int) at the
 start of the file. If callable, the callable function will be evaluated
 against the row indices, returning True if the row should be skipped and
 False otherwise. An example of a valid callable argument would be <code class="docutils literal notranslate"><span class="pre">lambda</span>
 <span class="pre">x:</span> <span class="pre">x</span> <span class="pre">in</span> <span class="pre">[0,</span> <span class="pre">2]</span></code>.</li>
 <li><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default None</em>) – Number of rows to parse.</li>
 <li><strong>na_values</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>list-like</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default None</em>) – Additional strings to recognize as NA/NaN. If dict passed, specific
 per-column NA values. By default the following values are interpreted
 as NaN: ‘’, ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, ‘-NaN’, ‘-nan’,
 ‘1.#IND’, ‘1.#QNAN’, ‘&lt;NA&gt;’, ‘N/A’, ‘NA’, ‘NULL’, ‘NaN’, ‘None’,
 ‘n/a’, ‘nan’, ‘null’.</li>
 <li><strong>keep_default_na</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>Whether or not to include the default NaN values when parsing the data.
 Depending on whether <cite>na_values</cite> is passed in, the behavior is as follows:</p>
 <ul>
 <li>If <cite>keep_default_na</cite> is True, and <cite>na_values</cite> are specified, <cite>na_values</cite>
 is appended to the default NaN values used for parsing.</li>
 <li>If <cite>keep_default_na</cite> is True, and <cite>na_values</cite> are not specified, only
 the default NaN values are used for parsing.</li>
 <li>If <cite>keep_default_na</cite> is False, and <cite>na_values</cite> are specified, only
 the NaN values specified <cite>na_values</cite> are used for parsing.</li>
 <li>If <cite>keep_default_na</cite> is False, and <cite>na_values</cite> are not specified, no
 strings will be parsed as NaN.</li>
 </ul>
 <p>Note that if <cite>na_filter</cite> is passed in as False, the <cite>keep_default_na</cite> and
 <cite>na_values</cite> parameters will be ignored.</p>
 </li>
 <li><strong>na_filter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Detect missing value markers (empty strings and the value of na_values). In
 data without any NAs, passing na_filter=False can improve the performance
 of reading a large file.</li>
 <li><strong>verbose</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Indicate number of NA values placed in non-numeric columns.</li>
 <li><strong>parse_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>list-like</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default False</em>) – <p>The behavior is as follows:</p>
 <ul>
 <li>bool. If True -&gt; try parsing the index.</li>
 <li>list of int or names. e.g. If [1, 2, 3] -&gt; try parsing columns 1, 2, 3
 each as a separate date column.</li>
 <li>list of lists. e.g.  If [[1, 3]] -&gt; combine columns 1 and 3 and parse as
 a single date column.</li>
 <li>dict, e.g. {‘foo’ : [1, 3]} -&gt; parse columns 1, 3 as date and call
 result ‘foo’</li>
 </ul>
 <p>If a column or index contains an unparsable date, the entire column or
 index will be returned unaltered as an object data type. If you don`t want to
 parse some cells as date just change their type in Excel to “Text”.
 For non-standard datetime parsing, use <code class="docutils literal notranslate"><span class="pre">pd.to_datetime</span></code> after <code class="docutils literal notranslate"><span class="pre">pd.read_excel</span></code>.</p>
 <p>Note: A fast-path exists for iso8601-formatted dates.</p>
 </li>
 <li><strong>date_parser</strong> (<em>function</em><em>, </em><em>optional</em>) – <p>Function to use for converting a sequence of string columns to an array of
 datetime instances. The default uses <code class="docutils literal notranslate"><span class="pre">dateutil.parser.parser</span></code> to do the
 conversion. Pandas will try to call <cite>date_parser</cite> in three different ways,
 advancing to the next if an exception occurs: 1) Pass one or more arrays
 (as defined by <cite>parse_dates</cite>) as arguments; 2) concatenate (row-wise) the
 string values from the columns defined by <cite>parse_dates</cite> into a single array
 and pass that; and 3) call <cite>date_parser</cite> once for each row using one or
 more strings (corresponding to the columns defined by <cite>parse_dates</cite>) as
 arguments.</p>
 <div class="deprecated">
 <p><span class="versionmodified">Deprecated since version 2.0.0: </span>Use <code class="docutils literal notranslate"><span class="pre">date_format</span></code> instead, or read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply
 <code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code> as-needed.</p>
 </div>
 </li>
 <li><strong>date_format</strong> (str or dict of column -&gt; format, default <code class="docutils literal notranslate"><span class="pre">None</span></code>) – <p>If used in conjunction with <code class="docutils literal notranslate"><span class="pre">parse_dates</span></code>, will parse dates according to this
 format. For anything more complex,
 please read in as <code class="docutils literal notranslate"><span class="pre">object</span></code> and then apply <code class="xref py py-func docutils literal notranslate"><span class="pre">to_datetime()</span></code> as-needed.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.0.</span></p>
 </div>
 </li>
 <li><strong>thousands</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – Thousands separator for parsing string columns to numeric.  Note that
 this parameter is only necessary for columns stored as TEXT in Excel,
 any numeric columns will automatically be parsed, regardless of display
 format.</li>
 <li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default '.'</em>) – <p>Character to recognize as decimal point for parsing string columns to numeric.
 Note that this parameter is only necessary for columns stored as TEXT in Excel,
 any numeric columns will automatically be parsed, regardless of display
 format.(e.g. use ‘,’ for European data).</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.4.0.</span></p>
 </div>
 </li>
 <li><strong>comment</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default None</em>) – Comments out remainder of line. Pass a character or characters to this
 argument to indicate comments in the input file. Any data between the
 comment string and the end of the current line is ignored.</li>
 <li><strong>skipfooter</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default 0</em>) – Rows at the end to skip (0-indexed).</li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame from the passed in Excel file. See notes in sheet_name
 argument for more information on when a dict of DeferredDataFrames is returned.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or dict of DeferredDataFrames</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_excel()</span></code></dt>
 <dd>Write DeferredDataFrame to an Excel file.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv()</span></code></dt>
 <dd>Write DeferredDataFrame to a comma-separated values (csv) file.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a></dt>
 <dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_fwf" title="apache_beam.dataframe.io.read_fwf"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_fwf()</span></code></a></dt>
 <dd>Read a table of fixed-width formatted lines into DeferredDataFrame.</dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">The file can be read using the file name as string or an open file object:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
 <span class="go">       Name  Value</span>
 <span class="go">0   string1      1</span>
 <span class="go">1   string2      2</span>
 <span class="go">2  #Comment      3</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">),</span>
 <span class="gp">... </span>              <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet3&#39;</span><span class="p">)</span>
 <span class="go">   Unnamed: 0      Name  Value</span>
 <span class="go">0           0   string1      1</span>
 <span class="go">1           1   string2      2</span>
 <span class="go">2           2  #Comment      3</span>

 <span class="go">Index and header can be specified via the `index_col` and `header` arguments</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
 <span class="go">     0         1      2</span>
 <span class="go">0  NaN      Name  Value</span>
 <span class="go">1  0.0   string1      1</span>
 <span class="go">2  1.0   string2      2</span>
 <span class="go">3  2.0  #Comment      3</span>

 <span class="go">Column types are inferred but can be explicitly specified</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
 <span class="gp">... </span>              <span class="n">dtype</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;Name&#39;</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="s1">&#39;Value&#39;</span><span class="p">:</span> <span class="nb">float</span><span class="p">})</span>
 <span class="go">       Name  Value</span>
 <span class="go">0   string1    1.0</span>
 <span class="go">1   string2    2.0</span>
 <span class="go">2  #Comment    3.0</span>

 <span class="go">True, False, and NA values, and thousands separators have defaults,</span>
 <span class="go">but can be explicitly specified, too. Supply the values you would like</span>
 <span class="go">as strings or lists of strings!</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
 <span class="gp">... </span>              <span class="n">na_values</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;string1&#39;</span><span class="p">,</span> <span class="s1">&#39;string2&#39;</span><span class="p">])</span>
 <span class="go">       Name  Value</span>
 <span class="go">0       NaN      1</span>
 <span class="go">1       NaN      2</span>
 <span class="go">2  #Comment      3</span>

 <span class="go">Comment lines in the excel input file can be skipped using the `comment` kwarg</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="s1">&#39;tmp.xlsx&#39;</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">comment</span><span class="o">=</span><span class="s1">&#39;#&#39;</span><span class="p">)</span>
 <span class="go">      Name  Value</span>
 <span class="go">0  string1    1.0</span>
 <span class="go">1  string2    2.0</span>
 <span class="go">2     None    NaN</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_feather">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_feather</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_feather" title="Permalink to this definition">¶</a></dt>
 <dd><p>Load a feather-format object from the file path.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">read()</span></code> function. The string could be a URL.
 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be: <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.feather</span></code>.</li>
 <li><strong>columns</strong> (<em>sequence</em><em>, </em><em>default None</em>) – If not provided, all columns are read.</li>
 <li><strong>use_threads</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether to parallelize reading using multiple threads.</li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">type of object stored in file</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_parquet">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_parquet</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_parquet" title="Permalink to this definition">¶</a></dt>
 <dd><p>Load a parquet object from the file path, returning a DataFrame.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">read()</span></code> function.
 The string could be a URL. Valid URL schemes include http, ftp, s3,
 gs, and file. For file URLs, a host is expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.parquet</span></code>.
 A file URL can also be a path to a directory that contains multiple
 partitioned parquet files. Both pyarrow and fastparquet support
 paths to directories as well as file URLs. A directory path could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/tables</span></code> or <code class="docutils literal notranslate"><span class="pre">s3://bucket/partition_dir</span></code>.</li>
 <li><strong>engine</strong> (<em>{'auto'</em><em>, </em><em>'pyarrow'</em><em>, </em><em>'fastparquet'}</em><em>, </em><em>default 'auto'</em>) – Parquet library to use. If ‘auto’, then the option
 <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code> is used. The default <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code>
 behavior is to try ‘pyarrow’, falling back to ‘fastparquet’ if
 ‘pyarrow’ is unavailable.</li>
 <li><strong>columns</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>default=None</em>) – If not None, only these columns will be read from the file.</li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.3.0.</span></p>
 </div>
 </li>
 <li><strong>use_nullable_dtypes</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, use dtypes that use <code class="docutils literal notranslate"><span class="pre">pd.NA</span></code> as missing value indicator
 for the resulting DeferredDataFrame. (only applicable for the <code class="docutils literal notranslate"><span class="pre">pyarrow</span></code>
 engine)
 As new dtypes are added that support <code class="docutils literal notranslate"><span class="pre">pd.NA</span></code> in the future, the
 output with this option will change to use those dtypes.
 Note: this is an experimental option, and behaviour (e.g. additional
 support dtypes) may change without notice.</p>
 <div class="deprecated">
 <p><span class="versionmodified">Deprecated since version 2.0.</span></p>
 </div>
 </li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 <li><strong>**kwargs</strong> – Any additional kwargs are passed to the engine.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_sas">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_sas</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_sas" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read SAS files stored as either XPORT or SAS7BDAT format files.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">read()</span></code> function. The string could be a URL.
 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be:
 <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.sas7bdat</span></code>.</li>
 <li><strong>format</strong> (<em>str {'xport'</em><em>, </em><em>'sas7bdat'}</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a>) – If None, file format is inferred from file extension. If ‘xport’ or
 ‘sas7bdat’, uses the corresponding format.</li>
 <li><strong>index</strong> (<em>identifier of index column</em><em>, </em><em>defaults to None</em>) – Identifier of column that should be used as index of the DeferredDataFrame.</li>
 <li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default is None</em>) – Encoding for text data.  If None, text data are stored as raw bytes.</li>
 <li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a>) – <p>Read file <cite>chunksize</cite> lines at a time, returns iterator.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2: </span><code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code> is a context manager.</p>
 </div>
 </li>
 <li><strong>iterator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>defaults to False</em>) – <p>If True, returns an iterator for reading the file incrementally.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2: </span><code class="docutils literal notranslate"><span class="pre">TextFileReader</span></code> is a context manager.</p>
 </div>
 </li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘filepath_or_buffer’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul class="simple">
 <li><em>DeferredDataFrame if iterator=False and chunksize=None, else SAS7BDATReader</em></li>
 <li><em>or XportReader</em></li>
 </ul>
 </p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_spss">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_spss</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_spss" title="Permalink to this definition">¶</a></dt>
 <dd><p>Load an SPSS file from the file path, returning a DataFrame.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><em>Path</em>) – File path.</li>
 <li><strong>usecols</strong> (<em>list-like</em><em>, </em><em>optional</em>) – Return a subset of the columns. If None, return all columns.</li>
 <li><strong>convert_categoricals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default is True</em>) – Convert categorical columns into pd.Categorical.</li>
 <li><strong>dtype_backend</strong> (<em>{&quot;numpy_nullable&quot;</em><em>, </em><em>&quot;pyarrow&quot;}</em><em>, </em><em>defaults to NumPy backed DeferredDataFrames</em>) – <p>Which dtype_backend to use, e.g. whether a DeferredDataFrame should have NumPy
 arrays, nullable dtypes are used for all dtypes that have a nullable
 implementation when “numpy_nullable” is set, pyarrow is used for all
 dtypes if “pyarrow” is set.</p>
 <p>The dtype_backends are still experimential.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.read_stata">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">read_stata</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.read_stata" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read Stata file into DataFrame.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>filepath_or_buffer</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em> or </em><em>file-like object</em>) – <p>Any valid string path is acceptable. The string could be a URL. Valid
 URL schemes include http, ftp, s3, and file. For file URLs, a host is
 expected. A local file could be: <code class="docutils literal notranslate"><span class="pre">file://localhost/path/to/table.dta</span></code>.</p>
 <p>If you want to pass in a path object, pandas accepts any <code class="docutils literal notranslate"><span class="pre">os.PathLike</span></code>.</p>
 <p>By file-like object, we refer to objects with a <code class="docutils literal notranslate"><span class="pre">read()</span></code> method,
 such as a file handle (e.g. via builtin <code class="docutils literal notranslate"><span class="pre">open</span></code> function)
 or <code class="docutils literal notranslate"><span class="pre">StringIO</span></code>.</p>
 </li>
 <li><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Convert date variables to DeferredDataFrame time values.</li>
 <li><strong>convert_categoricals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Read value labels and convert columns to Categorical/Factor variables.</li>
 <li><strong>index_col</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – Column to set as index.</li>
 <li><strong>convert_missing</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Flag indicating whether to convert missing values to their Stata
 representations.  If False, missing values are replaced with nan.
 If True, columns containing missing values are returned with
 object data types and missing values are represented by
 StataMissingValue objects.</li>
 <li><strong>preserve_dtypes</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Preserve Stata datatypes. If False, numeric data are upcast to pandas
 default types for foreign data (float64 or int64).</li>
 <li><strong>columns</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a>) – Columns to retain.  Columns will be returned in the given order.  None
 returns all columns.</li>
 <li><strong>order_categoricals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Flag indicating whether converted categorical data are ordered.</li>
 <li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default None</em>) – Return StataReader object for iterations, returns chunks with
 given number of lines.</li>
 <li><strong>iterator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default False</em>) – Return StataReader object.</li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly decompression of on-disk data. If ‘infer’ and ‘filepath_or_buffer’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 If using ‘zip’ or ‘tar’, the ZIP file must contain only one data file to be read in.
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no decompression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdDecompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for Zstandard decompression using a
 custom compression dictionary:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'zstd',</span> <span class="pre">'dict_data':</span> <span class="pre">my_compression_dict}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 </li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="apache_beam.dataframe.frames.html#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or StataReader</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">io.stata.StataReader()</span></code></dt>
 <dd>Low-level reader for Stata data files.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_stata()</span></code></dt>
 <dd>Export Stata data files.</dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>Categorical variables read through an iterator may not have the same
 categories and dtype. This occurs when  a variable stored in a DTA
 file is associated to an incomplete set of value labels that only
 label a strict subset of the values.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Creating a dummy stata for this example</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                    <span class="s1">&#39;speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">350</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">361</span><span class="p">,</span> <span class="mi">15</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>

 <span class="go">Read a Stata dta file:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>

 <span class="go">Read a Stata dta file in 10,000 line chunks:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">values</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="p">(</span><span class="mi">20_000</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;uint8&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;i&quot;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;filename.dta&#39;</span><span class="p">)</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_stata</span><span class="p">(</span><span class="s1">&#39;filename.dta&#39;</span><span class="p">,</span> <span class="n">chunksize</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span> <span class="k">as</span> <span class="n">itr</span><span class="p">:</span>
 <span class="gp">&gt;&gt;&gt; </span>    <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">itr</span><span class="p">:</span>
 <span class="gp">... </span>        <span class="c1"># Operate on a single chunk, e.g., chunk.mean()</span>
 <span class="gp">... </span>        <span class="k">pass</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_excel">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_excel</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_excel" title="Permalink to this definition">¶</a></dt>
 <dd><p>Write object to an Excel sheet.</p>
 <p>To write a single object to an Excel .xlsx file it is only necessary to
 specify a target file name. To write to multiple sheets it is necessary to
 create an <cite>ExcelWriter</cite> object with a target file name, and specify a sheet
 in the file to write to.</p>
 <p>Multiple sheets may be written to by specifying unique <cite>sheet_name</cite>.
 With all data written to the file it is necessary to save the changes.
 Note that creating an <cite>ExcelWriter</cite> object with a file name that already
 exists will result in the contents of the existing file being erased.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>excel_writer</strong> (<em>path-like</em><em>, </em><em>file-like</em><em>, or </em><em>ExcelWriter object</em>) – File path or existing ExcelWriter.</li>
 <li><strong>sheet_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default 'Sheet1'</em>) – Name of sheet which will contain DeferredDataFrame.</li>
 <li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</li>
 <li><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – Format string for floating point numbers. For example
 <code class="docutils literal notranslate"><span class="pre">float_format=&quot;%.2f&quot;</span></code> will format 0.1234 to 0.12.</li>
 <li><strong>columns</strong> (<em>sequence</em><em> or </em><em>list of str</em><em>, </em><em>optional</em>) – Columns to write.</li>
 <li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – Write out the column names. If a list of string is given it is
 assumed to be aliases for the column names.</li>
 <li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</li>
 <li><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><em>sequence</em><em>, </em><em>optional</em>) – Column label for index column(s) if desired. If not specified, and
 <cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
 sequence should be given if the DeferredDataFrame uses MultiIndex.</li>
 <li><strong>startrow</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell row to dump data frame.</li>
 <li><strong>startcol</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.12)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell column to dump data frame.</li>
 <li><strong>engine</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – Write engine to use, ‘openpyxl’ or ‘xlsxwriter’. You can also set this
 via the options <code class="docutils literal notranslate"><span class="pre">io.excel.xlsx.writer</span></code> or
 <code class="docutils literal notranslate"><span class="pre">io.excel.xlsm.writer</span></code>.</li>
 <li><strong>merge_cells</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default True</em>) – Write MultiIndex and Hierarchical Rows as merged cells.</li>
 <li><strong>inf_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>default 'inf'</em>) – Representation for infinity (there is no native representation for
 infinity in Excel).</li>
 <li><strong>freeze_panes</strong> (<em>tuple of int</em><em> (</em><em>length 2</em><em>)</em><em>, </em><em>optional</em>) – Specifies the one-based bottommost row and rightmost column that
 is to be frozen.</li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.to_csv" title="apache_beam.dataframe.io.to_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">to_csv()</span></code></a></dt>
 <dd>Write DeferredDataFrame to a comma-separated values (csv) file.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">ExcelWriter()</span></code></dt>
 <dd>Class for writing DeferredDataFrame objects into excel sheets.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_excel" title="apache_beam.dataframe.io.read_excel"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_excel()</span></code></a></dt>
 <dd>Read an Excel file into a pandas DeferredDataFrame.</dd>
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_csv" title="apache_beam.dataframe.io.read_csv"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_csv()</span></code></a></dt>
 <dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">io.formats.style.Styler.to_excel()</span></code></dt>
 <dd>Add styles to Excel sheet.</dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>For compatibility with <code class="xref py py-meth docutils literal notranslate"><span class="pre">to_csv()</span></code>,
 to_excel serializes lists and dicts to strings before writing.</p>
 <p>Once a workbook has been saved it is not possible to write further
 data without rewriting the whole workbook.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create, write to and save a workbook:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]],</span>
 <span class="gp">... </span>                   <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;row 1&#39;</span><span class="p">,</span> <span class="s1">&#39;row 2&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col 1&#39;</span><span class="p">,</span> <span class="s1">&#39;col 2&#39;</span><span class="p">])</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">)</span>

 <span class="go">To specify the sheet name:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">,</span>
 <span class="gp">... </span>             <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>

 <span class="go">If you wish to write to more than one sheet in the workbook, it is</span>
 <span class="go">necessary to specify an ExcelWriter object:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">df1</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
 <span class="gp">... </span>    <span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>
 <span class="gp">... </span>    <span class="n">df2</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_2&#39;</span><span class="p">)</span>

 <span class="go">ExcelWriter can also be used to append to an existing Excel file:</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">,</span>
 <span class="gp">... </span>                    <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
 <span class="gp">... </span>    <span class="n">df</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_3&#39;</span><span class="p">)</span>

 <span class="go">To set the library that is used to write the Excel file,</span>
 <span class="go">you can pass the `engine` keyword (the default engine is</span>
 <span class="go">automatically chosen depending on the file extension):</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s1">&#39;output1.xlsx&#39;</span><span class="p">,</span> <span class="n">engine</span><span class="o">=</span><span class="s1">&#39;xlsxwriter&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_feather">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_feather</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_feather" title="Permalink to this definition">¶</a></dt>
 <dd><p>Write a DataFrame to the binary Feather format.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">write()</span></code> function. If a string or a path,
 it will be used as Root Directory path when writing a partitioned dataset.</li>
 <li><strong>**kwargs</strong> – <p>Additional keywords passed to <code class="xref py py-func docutils literal notranslate"><span class="pre">pyarrow.feather.write_feather()</span></code>.
 Starting with pyarrow 0.17, this includes the <cite>compression</cite>,
 <cite>compression_level</cite>, <cite>chunksize</cite> and <cite>version</cite> keywords.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.1.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <p class="rubric">Notes</p>
 <p>This function writes the dataframe as a <a class="reference external" href="https://arrow.apache.org/docs/python/feather.html">feather file</a>. Requires a default
 index. For saving the DeferredDataFrame with your custom index use a method that
 supports custom indices e.g. <cite>to_parquet</cite>.</p>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_parquet">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_parquet</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_parquet" title="Permalink to this definition">¶</a></dt>
 <dd><p>Write a DataFrame to the binary parquet format.</p>
 <p>This function writes the dataframe as a <a class="reference external" href="https://parquet.apache.org/">parquet file</a>. You can choose different parquet
 backends, and have the option of compression. See
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v3.0.0.dev0+802.g7c836ed2ec)"><span class="xref std std-ref">the user guide</span></a> for more details.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, </em><em>file-like object</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><em>None</em></a><em>, </em><em>default None</em>) – <p>String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">write()</span></code> function. If None, the result is
 returned as bytes. If a string or path, it will be used as Root Directory
 path when writing a partitioned dataset.</p>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.2.0.</span></p>
 </div>
 <p>Previously this was “fname”</p>
 </li>
 <li><strong>engine</strong> (<em>{'auto'</em><em>, </em><em>'pyarrow'</em><em>, </em><em>'fastparquet'}</em><em>, </em><em>default 'auto'</em>) – Parquet library to use. If ‘auto’, then the option
 <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code> is used. The default <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code>
 behavior is to try ‘pyarrow’, falling back to ‘fastparquet’ if
 ‘pyarrow’ is unavailable.</li>
 <li><strong>compression</strong> (<em>{'snappy'</em><em>, </em><em>'gzip'</em><em>, </em><em>'brotli'</em><em>, </em><em>None}</em><em>, </em><em>default 'snappy'</em>) – Name of the compression to use. Use <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.</li>
 <li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a><em>, </em><em>default None</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, include the dataframe’s index(es) in the file output.
 If <code class="docutils literal notranslate"><span class="pre">False</span></code>, they will not be written to the file.
 If <code class="docutils literal notranslate"><span class="pre">None</span></code>, similar to <code class="docutils literal notranslate"><span class="pre">True</span></code> the dataframe’s index(es)
 will be saved. However, instead of being saved as values,
 the RangeIndex will be stored as a range in the metadata so it
 doesn’t require much space and is faster. Other indexes will
 be included as columns in the file output.</li>
 <li><strong>partition_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Column names by which to partition the dataset.
 Columns are partitioned in the order they are given.
 Must be None if path is not a string.</li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>**kwargs</strong> – Additional arguments passed to the parquet library. See
 <a class="reference external" href="http://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v3.0.0.dev0+802.g7c836ed2ec)"><span class="xref std std-ref">pandas io</span></a> for more details.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bytes if no path argument is provided else None</p>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_parquet" title="apache_beam.dataframe.io.read_parquet"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_parquet()</span></code></a></dt>
 <dd>Read a parquet file.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_orc()</span></code></dt>
 <dd>Write an orc file.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv()</span></code></dt>
 <dd>Write a csv file.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_sql()</span></code></dt>
 <dd>Write to a sql table.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">DeferredDataFrame.to_hdf()</span></code></dt>
 <dd>Write to hdf.</dd>
 </dl>
 </div>
 <p class="rubric">Notes</p>
 <p>This function requires either the <a class="reference external" href="https://pypi.org/project/fastparquet">fastparquet</a> or <a class="reference external" href="https://arrow.apache.org/docs/python/">pyarrow</a> library.</p>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">,</span>
 <span class="gp">... </span>              <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;gzip&#39;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">)</span>
 <span class="go">   col1  col2</span>
 <span class="go">0     1     3</span>
 <span class="go">1     2     4</span>

 <span class="go">If you want to get a buffer to the parquet content you can use a io.BytesIO</span>
 <span class="go">object, as long as you don&#39;t use partition_cols, which creates multiple files.</span>

 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">io</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">BytesIO</span><span class="p">()</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 <span class="go">0</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
 </pre></div>
 </div>
 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.dataframe.io.to_stata">
 <code class="descclassname">apache_beam.dataframe.io.</code><code class="descname">to_stata</code><span class="sig-paren">(</span><em>df</em>, <em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.io.to_stata" title="Permalink to this definition">¶</a></dt>
 <dd><p>Export DataFrame object to Stata dta format.</p>
 <p>Writes the DataFrame to a Stata dataset file.
 “dta” files contain a Stata dataset.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>path object</em><em>, or </em><em>buffer</em>) – String, path object (implementing <code class="docutils literal notranslate"><span class="pre">os.PathLike[str]</span></code>), or file-like
 object implementing a binary <code class="docutils literal notranslate"><span class="pre">write()</span></code> function.</li>
 <li><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a>) – Dictionary mapping columns containing datetime types to stata
 internal format to use when writing the dates. Options are ‘tc’,
 ‘td’, ‘tm’, ‘tw’, ‘th’, ‘tq’, ‘ty’. Column can be either an integer
 or a name. Datetime columns that do not have a conversion type
 specified will be converted to ‘tc’. Raises NotImplementedError if
 a datetime column has timezone information.</li>
 <li><strong>write_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.12)"><em>bool</em></a>) – Write the index to Stata dataset.</li>
 <li><strong>byteorder</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a>) – Can be “&gt;”, “&lt;”, “little”, or “big”. default is <cite>sys.byteorder</cite>.</li>
 <li><strong>time_stamp</strong> (<em>datetime</em>) – A datetime to use as file creation date.  Default is the current
 time.</li>
 <li><strong>data_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em>, </em><em>optional</em>) – A label for the data set.  Must be 80 characters or smaller.</li>
 <li><strong>variable_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a>) – Dictionary containing columns as keys and variable labels as
 values. Each label must be 80 characters or smaller.</li>
 <li><strong>version</strong> (<em>{114</em><em>, </em><em>117</em><em>, </em><em>118</em><em>, </em><em>119</em><em>, </em><em>None}</em><em>, </em><em>default 114</em>) – <p>Version to use in the output dta file. Set to None to let pandas
 decide between 118 or 119 formats depending on the number of
 columns in the frame. pandas Version 114 can be read by Stata 10 and
 later. pandas Version 117 can be read by Stata 13 or later. pandas Version 118
 is supported in Stata 14 and later. pandas Version 119 is supported in
 Stata 15 and later. pandas Version 114 limits string variables to 244
 characters or fewer while versions 117 and later allow strings
 with lengths up to 2,000,000 characters. Versions 118 and 119
 support Unicode characters, and pandas version 119 supports more than
 32,767 variables.</p>
 <p>pandas Version 119 should usually only be used when the number of
 variables exceeds the capacity of dta format 118. Exporting
 smaller datasets in format 119 may have unintended consequences,
 and, as of November 2020, Stata SE cannot read pandas version 119 files.</p>
 </li>
 <li><strong>convert_strl</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.12)"><em>list</em></a><em>, </em><em>optional</em>) – List of column names to convert to string columns to Stata StrL
 format. Only available if version is 117.  Storing strings in the
 StrL format can produce smaller dta files if strings have more than
 8 characters and values are repeated.</li>
 <li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.12)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output data. If ‘infer’ and ‘path’ is
 path-like, then detect compression from the following extensions: ‘.gz’,
 ‘.bz2’, ‘.zip’, ‘.xz’, ‘.zst’, ‘.tar’, ‘.tar.gz’, ‘.tar.xz’ or ‘.tar.bz2’
 (otherwise no compression).
 Set to <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.
 Can also be a dict with key <code class="docutils literal notranslate"><span class="pre">'method'</span></code> set
 to one of {<code class="docutils literal notranslate"><span class="pre">'zip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'gzip'</span></code>, <code class="docutils literal notranslate"><span class="pre">'bz2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'zstd'</span></code>, <code class="docutils literal notranslate"><span class="pre">'tar'</span></code>} and other
 key-value pairs are forwarded to
 <code class="docutils literal notranslate"><span class="pre">zipfile.ZipFile</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip.GzipFile</span></code>,
 <code class="docutils literal notranslate"><span class="pre">bz2.BZ2File</span></code>, <code class="docutils literal notranslate"><span class="pre">zstandard.ZstdCompressor</span></code> or
 <code class="docutils literal notranslate"><span class="pre">tarfile.TarFile</span></code>, respectively.
 As an example, the following could be passed for faster compression and to create
 a reproducible gzip archive:
 <code class="docutils literal notranslate"><span class="pre">compression={'method':</span> <span class="pre">'gzip',</span> <span class="pre">'compresslevel':</span> <span class="pre">1,</span> <span class="pre">'mtime':</span> <span class="pre">1}</span></code>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.5.0: </span>Added support for <cite>.tar</cite> files.</p>
 </div>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.1.0.</span></p>
 </div>
 <div class="versionchanged">
 <p><span class="versionmodified">Changed in version 1.4.0: </span>Zstandard support.</p>
 </div>
 </li>
 <li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.12)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
 host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
 are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib.request.Request</span></code> as header options. For other
 URLs (e.g. starting with “s3://”, and “gcs://”) the key-value pairs are
 forwarded to <code class="docutils literal notranslate"><span class="pre">fsspec.open</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more
 details, and for more examples on storage options refer <a class="reference external" href="https://pandas.pydata.org/docs/user_guide/io.html?highlight=storage_options#reading-writing-remote-files">here</a>.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.2.0.</span></p>
 </div>
 </li>
 <li><strong>value_labels</strong> (<em>dict of dicts</em>) – <p>Dictionary containing columns as keys and dictionaries of column value
 to labels as values. Labels for a single variable must be 32,000
 characters or smaller.</p>
 <div class="versionadded">
 <p><span class="versionmodified">New in version 1.4.0.</span></p>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last">
 <li><p class="first"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="(in Python v3.12)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">NotImplementedError</span></code></a> – * If datetimes contain timezone information
 * Column dtype is not representable in Stata</p>
 </li>
 <li><dl class="first docutils">
 <dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.12)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * Columns listed in convert_dates are neither datetime64[ns]</dt>
 <dd><p class="first last">or datetime.datetime</p>
 </dd>
 </dl>
 <ul class="simple">
 <li>Column listed in convert_dates is not in DeferredDataFrame</li>
 <li>Categorical label contains more than 32,000 characters</li>
 </ul>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Differences from pandas</p>
 <p>This operation has no known divergences from the pandas API.</p>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><a class="reference internal" href="#apache_beam.dataframe.io.read_stata" title="apache_beam.dataframe.io.read_stata"><code class="xref py py-func docutils literal notranslate"><span class="pre">read_stata()</span></code></a></dt>
 <dd>Import Stata data files.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">io.stata.StataWriter()</span></code></dt>
 <dd>Low-level writer for Stata data files.</dd>
 <dt><code class="xref py py-func docutils literal notranslate"><span class="pre">io.stata.StataWriter117()</span></code></dt>
 <dd>Low-level writer for pandas version 117 files.</dd>
 </dl>
 </div>
 <p class="rubric">Examples</p>
 <p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
 <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span>
 <span class="gp">... </span>                              <span class="s1">&#39;parrot&#39;</span><span class="p">],</span>
 <span class="gp">... </span>                   <span class="s1">&#39;speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">350</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">361</span><span class="p">,</span> <span class="mi">15</span><span class="p">]})</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>

 </div>
 </div>


            </div>

           </div>
           <footer>

     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">

         <a href="apache_beam.dataframe.pandas_top_level_functions.html" class="btn btn-neutral float-right" title="apache_beam.dataframe.pandas_top_level_functions module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>


         <a href="apache_beam.dataframe.frames.html" class="btn btn-neutral float-left" title="apache_beam.dataframe.frames module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>

     </div>


   <hr/>

   <div role="contentinfo">
     <p>
         &copy; Copyright

     </p>
   </div>
   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.

 </footer>

         </div>
       </div>

     </section>

   </div>


   <script type="text/javascript">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>


 </body>
 </html>