blob: 81f48da6888112e8d20a8a58b9af4c3288644217 [file] [log] [blame]
<!DOCTYPE html>
<html >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pyspark.sql.readwriter &#8212; PySpark 4.0.0-preview1 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" />
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/sql/readwriter';</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/sql/readwriter.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/>
<script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/sql/readwriter.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/sql/readwriter.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumbs">
<ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
<li class="breadcrumb-item active" aria-current="page">pyspark.sql.readwriter</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<h1>Source code for pyspark.sql.readwriter</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">cast</span><span class="p">,</span> <span class="n">overload</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span><span class="p">,</span> <span class="n">Union</span>
<span class="kn">from</span> <span class="nn">pyspark.util</span> <span class="kn">import</span> <span class="n">is_remote_only</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.column</span> <span class="kn">import</span> <span class="n">Column</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="n">StructType</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">utils</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.utils</span> <span class="kn">import</span> <span class="n">to_str</span>
<span class="kn">from</span> <span class="nn">pyspark.errors</span> <span class="kn">import</span> <span class="n">PySparkTypeError</span><span class="p">,</span> <span class="n">PySparkValueError</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span>
<span class="kn">from</span> <span class="nn">pyspark.core.rdd</span> <span class="kn">import</span> <span class="n">RDD</span>
<span class="kn">from</span> <span class="nn">pyspark.sql._typing</span> <span class="kn">import</span> <span class="n">OptionalPrimitiveType</span><span class="p">,</span> <span class="n">ColumnOrName</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.session</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.streaming</span> <span class="kn">import</span> <span class="n">StreamingQuery</span>
<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;DataFrameReader&quot;</span><span class="p">,</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">,</span> <span class="s2">&quot;DataFrameWriterV2&quot;</span><span class="p">]</span>
<span class="n">PathOrPaths</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
<span class="n">TupleOrListOfString</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="o">...</span><span class="p">]]</span>
<span class="k">class</span> <span class="nc">OptionUtils</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">_set_opts</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set named options (filter out those the value is None)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">options</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<div class="viewcode-block" id="DataFrameReader"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.html#pyspark.sql.DataFrameReader">[docs]</a><span class="k">class</span> <span class="nc">DataFrameReader</span><span class="p">(</span><span class="n">OptionUtils</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interface used to load a :class:`DataFrame` from external storage systems</span>
<span class="sd"> (e.g. file systems, key-value stores, etc). Use :attr:`SparkSession.read`</span>
<span class="sd"> to access this.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">spark</span><span class="p">:</span> <span class="s2">&quot;SparkSession&quot;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">spark</span>
<span class="k">def</span> <span class="nf">_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">jdf</span><span class="p">:</span> <span class="s2">&quot;JavaObject&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">jdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="p">)</span>
<div class="viewcode-block" id="DataFrameReader.format"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.format.html#pyspark.sql.DataFrameReader.format">[docs]</a> <span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameReader&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Specifies the input data source format.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> source : str</span>
<span class="sd"> string, name of the data source, e.g. &#39;json&#39;, &#39;parquet&#39;.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.read.format(&#39;json&#39;)</span>
<span class="sd"> &lt;...readwriter.DataFrameReader object ...&gt;</span>
<span class="sd"> Write a DataFrame into a JSON file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;format&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a JSON file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;json&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the JSON file as a DataFrame.</span>
<span class="sd"> ... spark.read.format(&#39;json&#39;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameReader.schema"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.schema.html#pyspark.sql.DataFrameReader.schema">[docs]</a> <span class="k">def</span> <span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameReader&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Specifies the input schema.</span>
<span class="sd"> Some data sources (e.g. JSON) can infer the input schema automatically from data.</span>
<span class="sd"> By specifying the schema here, the underlying data source can skip the schema</span>
<span class="sd"> inference step, and thus speed up data loading.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> schema : :class:`pyspark.sql.types.StructType` or str</span>
<span class="sd"> a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string</span>
<span class="sd"> (For example ``col0 INT, col1 DOUBLE``).</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.read.schema(&quot;col0 INT, col1 DOUBLE&quot;)</span>
<span class="sd"> &lt;...readwriter.DataFrameReader object ...&gt;</span>
<span class="sd"> Specify the schema with reading a CSV file.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;schema&quot;) as d:</span>
<span class="sd"> ... spark.read.schema(&quot;col0 INT, col1 DOUBLE&quot;).format(&quot;csv&quot;).load(d).printSchema()</span>
<span class="sd"> root</span>
<span class="sd"> |-- col0: integer (nullable = true)</span>
<span class="sd"> |-- col1: double (nullable = true)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">_getActiveSessionOrCreate</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">StructType</span><span class="p">):</span>
<span class="n">jschema</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="o">.</span><span class="n">parseDataType</span><span class="p">(</span><span class="n">schema</span><span class="o">.</span><span class="n">json</span><span class="p">())</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span><span class="n">jschema</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_STR_OR_STRUCT&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;schema&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameReader.option"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.option.html#pyspark.sql.DataFrameReader.option">[docs]</a> <span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameReader&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds an input option for the underlying data source.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> key : str</span>
<span class="sd"> The key for the option to set.</span>
<span class="sd"> value</span>
<span class="sd"> The value for the option to set.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.read.option(&quot;key&quot;, &quot;value&quot;)</span>
<span class="sd"> &lt;...readwriter.DataFrameReader object ...&gt;</span>
<span class="sd"> Specify the option &#39;nullValue&#39; with reading a CSV file.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;option&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file</span>
<span class="sd"> ... df = spark.createDataFrame([{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}])</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).format(&quot;csv&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;.</span>
<span class="sd"> ... spark.read.schema(df.schema).option(</span>
<span class="sd"> ... &quot;nullValue&quot;, &quot;Hyukjin Kwon&quot;).format(&#39;csv&#39;).load(d).show()</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |age|name|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |100|NULL|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">to_str</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameReader.options"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.options.html#pyspark.sql.DataFrameReader.options">[docs]</a> <span class="k">def</span> <span class="nf">options</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameReader&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds input options for the underlying data source.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> **options : dict</span>
<span class="sd"> The dictionary of string keys and prmitive-type values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.read.options(key=&quot;value&quot;)</span>
<span class="sd"> &lt;...readwriter.DataFrameReader object ...&gt;</span>
<span class="sd"> Specify options in a dictionary.</span>
<span class="sd"> &gt;&gt;&gt; spark.read.options(**{&quot;k1&quot;: &quot;v1&quot;, &quot;k2&quot;: &quot;v2&quot;})</span>
<span class="sd"> &lt;...readwriter.DataFrameReader object ...&gt;</span>
<span class="sd"> Specify the option &#39;nullValue&#39; and &#39;header&#39; with reading a CSV file.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;options&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file with a header.</span>
<span class="sd"> ... df = spark.createDataFrame([{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}])</span>
<span class="sd"> ... df.write.option(&quot;header&quot;, True).mode(&quot;overwrite&quot;).format(&quot;csv&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;,</span>
<span class="sd"> ... # and &#39;header&#39; option set to `True`.</span>
<span class="sd"> ... spark.read.options(</span>
<span class="sd"> ... nullValue=&quot;Hyukjin Kwon&quot;,</span>
<span class="sd"> ... header=True</span>
<span class="sd"> ... ).format(&#39;csv&#39;).load(d).show()</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |age|name|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |100|NULL|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">options</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">to_str</span><span class="p">(</span><span class="n">options</span><span class="p">[</span><span class="n">k</span><span class="p">]))</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameReader.load"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.load.html#pyspark.sql.DataFrameReader.load">[docs]</a> <span class="k">def</span> <span class="nf">load</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">PathOrPaths</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="nb">format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Loads data from a data source and returns it as a :class:`DataFrame`.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str or list, optional</span>
<span class="sd"> optional string or a list of string for file-system backed data sources.</span>
<span class="sd"> format : str, optional</span>
<span class="sd"> optional string for format of the data source. Default to &#39;parquet&#39;.</span>
<span class="sd"> schema : :class:`pyspark.sql.types.StructType` or str, optional</span>
<span class="sd"> optional :class:`pyspark.sql.types.StructType` for the input schema</span>
<span class="sd"> or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).</span>
<span class="sd"> **options : dict</span>
<span class="sd"> all other string options</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Load a CSV file with format, schema and options specified.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;load&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file with a header</span>
<span class="sd"> ... df = spark.createDataFrame([{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}])</span>
<span class="sd"> ... df.write.option(&quot;header&quot;, True).mode(&quot;overwrite&quot;).format(&quot;csv&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;,</span>
<span class="sd"> ... # and &#39;header&#39; option set to `True`.</span>
<span class="sd"> ... df = spark.read.load(</span>
<span class="sd"> ... d, schema=df.schema, format=&quot;csv&quot;, nullValue=&quot;Hyukjin Kwon&quot;, header=True)</span>
<span class="sd"> ... df.printSchema()</span>
<span class="sd"> ... df.show()</span>
<span class="sd"> root</span>
<span class="sd"> |-- age: long (nullable = true)</span>
<span class="sd"> |-- name: string (nullable = true)</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |age|name|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |100|NULL|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">format</span><span class="p">)</span>
<span class="k">if</span> <span class="n">schema</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="p">(</span><span class="o">**</span><span class="n">options</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">))</span>
<span class="k">elif</span> <span class="n">path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">list</span><span class="p">:</span>
<span class="n">path</span> <span class="o">=</span> <span class="p">[</span><span class="n">path</span><span class="p">]</span> <span class="c1"># type: ignore[list-item]</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toSeq</span><span class="p">(</span><span class="n">path</span><span class="p">)))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">load</span><span class="p">())</span></div>
<div class="viewcode-block" id="DataFrameReader.json"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.json.html#pyspark.sql.DataFrameReader.json">[docs]</a> <span class="k">def</span> <span class="nf">json</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="s2">&quot;RDD[str]&quot;</span><span class="p">],</span>
<span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">primitivesAsString</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">prefersDecimal</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowComments</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowUnquotedFieldNames</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowSingleQuotes</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowNumericLeadingZero</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowBackslashEscapingAnyCharacter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">columnNameOfCorruptRecord</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multiLine</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowUnquotedControlChars</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">lineSep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dropFieldIfAllNull</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowNonNumericNumbers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads JSON files and returns the results as a :class:`DataFrame`.</span>
<span class="sd"> `JSON Lines &lt;http://jsonlines.org/&gt;`_ (newline-delimited JSON) is supported by default.</span>
<span class="sd"> For JSON (one record per file), set the ``multiLine`` parameter to ``true``.</span>
<span class="sd"> If the ``schema`` parameter is not specified, this function goes</span>
<span class="sd"> through the input once to determine the input schema.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str, list or :class:`RDD`</span>
<span class="sd"> string represents path to the JSON dataset, or a list of paths,</span>
<span class="sd"> or RDD of Strings storing JSON objects.</span>
<span class="sd"> schema : :class:`pyspark.sql.types.StructType` or str, optional</span>
<span class="sd"> an optional :class:`pyspark.sql.types.StructType` for the input schema or</span>
<span class="sd"> a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Example 1: Write a DataFrame into a JSON file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;json1&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a JSON file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;json&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the JSON file as a DataFrame.</span>
<span class="sd"> ... spark.read.json(d).show()</span>
<span class="sd"> +---+-------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+-------+</span>
<span class="sd"> |100|Hyukjin|</span>
<span class="sd"> +---+-------+</span>
<span class="sd"> Example 2: Read JSON from multiple files in a directory</span>
<span class="sd"> &gt;&gt;&gt; from tempfile import TemporaryDirectory</span>
<span class="sd"> &gt;&gt;&gt; with TemporaryDirectory(prefix=&quot;json2&quot;) as d1, TemporaryDirectory(prefix=&quot;json3&quot;) as d2:</span>
<span class="sd"> ... # Write a DataFrame into a JSON file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 30, &quot;name&quot;: &quot;Bob&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;json&quot;).save(d1)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the JSON files as a DataFrame.</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 25, &quot;name&quot;: &quot;Alice&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;json&quot;).save(d2)</span>
<span class="sd"> ... spark.read.json([d1, d2]).show()</span>
<span class="sd"> +---+-----+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+-----+</span>
<span class="sd"> | 25|Alice|</span>
<span class="sd"> | 30| Bob|</span>
<span class="sd"> +---+-----+</span>
<span class="sd"> Example 3: Read JSON with a custom schema</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;json4&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a JSON file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 30, &quot;name&quot;: &quot;Bob&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;json&quot;).save(d)</span>
<span class="sd"> ... custom_schema = &quot;name STRING, age INT&quot;</span>
<span class="sd"> ... spark.read.json(d, schema=custom_schema).show()</span>
<span class="sd"> +----+---+</span>
<span class="sd"> |name|age|</span>
<span class="sd"> +----+---+</span>
<span class="sd"> | Bob| 30|</span>
<span class="sd"> +----+---+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span>
<span class="n">primitivesAsString</span><span class="o">=</span><span class="n">primitivesAsString</span><span class="p">,</span>
<span class="n">prefersDecimal</span><span class="o">=</span><span class="n">prefersDecimal</span><span class="p">,</span>
<span class="n">allowComments</span><span class="o">=</span><span class="n">allowComments</span><span class="p">,</span>
<span class="n">allowUnquotedFieldNames</span><span class="o">=</span><span class="n">allowUnquotedFieldNames</span><span class="p">,</span>
<span class="n">allowSingleQuotes</span><span class="o">=</span><span class="n">allowSingleQuotes</span><span class="p">,</span>
<span class="n">allowNumericLeadingZero</span><span class="o">=</span><span class="n">allowNumericLeadingZero</span><span class="p">,</span>
<span class="n">allowBackslashEscapingAnyCharacter</span><span class="o">=</span><span class="n">allowBackslashEscapingAnyCharacter</span><span class="p">,</span>
<span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span>
<span class="n">columnNameOfCorruptRecord</span><span class="o">=</span><span class="n">columnNameOfCorruptRecord</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="o">=</span><span class="n">dateFormat</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="o">=</span><span class="n">timestampFormat</span><span class="p">,</span>
<span class="n">multiLine</span><span class="o">=</span><span class="n">multiLine</span><span class="p">,</span>
<span class="n">allowUnquotedControlChars</span><span class="o">=</span><span class="n">allowUnquotedControlChars</span><span class="p">,</span>
<span class="n">lineSep</span><span class="o">=</span><span class="n">lineSep</span><span class="p">,</span>
<span class="n">samplingRatio</span><span class="o">=</span><span class="n">samplingRatio</span><span class="p">,</span>
<span class="n">dropFieldIfAllNull</span><span class="o">=</span><span class="n">dropFieldIfAllNull</span><span class="p">,</span>
<span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span>
<span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="o">=</span><span class="n">pathGlobFilter</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="o">=</span><span class="n">recursiveFileLookup</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="o">=</span><span class="n">modifiedBefore</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="o">=</span><span class="n">modifiedAfter</span><span class="p">,</span>
<span class="n">allowNonNumericNumbers</span><span class="o">=</span><span class="n">allowNonNumericNumbers</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">path</span> <span class="o">=</span> <span class="p">[</span><span class="n">path</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="o">==</span> <span class="nb">list</span><span class="p">:</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">json</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toSeq</span><span class="p">(</span><span class="n">path</span><span class="p">)))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_remote_only</span><span class="p">():</span>
<span class="kn">from</span> <span class="nn">pyspark.core.rdd</span> <span class="kn">import</span> <span class="n">RDD</span> <span class="c1"># noqa: F401</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_remote_only</span><span class="p">()</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">func</span><span class="p">(</span><span class="n">iterator</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">:</span>
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">iterator</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span>
<span class="k">yield</span> <span class="n">x</span>
<span class="n">keyed</span> <span class="o">=</span> <span class="n">path</span><span class="o">.</span><span class="n">mapPartitions</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="n">keyed</span><span class="o">.</span><span class="n">_bypass_serializer</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">jrdd</span> <span class="o">=</span> <span class="n">keyed</span><span class="o">.</span><span class="n">_jrdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">BytesToString</span><span class="p">())</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">json</span><span class="p">(</span><span class="n">jrdd</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_STR_OR_LIST_OF_RDD&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;path&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameReader.table"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.table.html#pyspark.sql.DataFrameReader.table">[docs]</a> <span class="k">def</span> <span class="nf">table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns the specified table as a :class:`DataFrame`.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> tableName : str</span>
<span class="sd"> string, name of the table.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = spark.range(10)</span>
<span class="sd"> &gt;&gt;&gt; df.createOrReplaceTempView(&#39;tblA&#39;)</span>
<span class="sd"> &gt;&gt;&gt; spark.read.table(&#39;tblA&#39;).show()</span>
<span class="sd"> +---+</span>
<span class="sd"> | id|</span>
<span class="sd"> +---+</span>
<span class="sd"> | 0|</span>
<span class="sd"> | 1|</span>
<span class="sd"> | 2|</span>
<span class="sd"> | 3|</span>
<span class="sd"> | 4|</span>
<span class="sd"> | 5|</span>
<span class="sd"> | 6|</span>
<span class="sd"> | 7|</span>
<span class="sd"> | 8|</span>
<span class="sd"> | 9|</span>
<span class="sd"> +---+</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE tblA&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="n">tableName</span><span class="p">))</span></div>
<div class="viewcode-block" id="DataFrameReader.parquet"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.parquet.html#pyspark.sql.DataFrameReader.parquet">[docs]</a> <span class="k">def</span> <span class="nf">parquet</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">paths</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads Parquet files, returning the result as a :class:`DataFrame`.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> paths : str</span>
<span class="sd"> One or more file paths to read the Parquet files from.</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> **options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :class:`DataFrame`</span>
<span class="sd"> A DataFrame containing the data from the Parquet files.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Create sample dataframes.</span>
<span class="sd"> &gt;&gt;&gt; df = spark.createDataFrame(</span>
<span class="sd"> ... [(10, &quot;Alice&quot;), (15, &quot;Bob&quot;), (20, &quot;Tom&quot;)], schema=[&quot;age&quot;, &quot;name&quot;])</span>
<span class="sd"> &gt;&gt;&gt; df2 = spark.createDataFrame([(70, &quot;Alice&quot;), (80, &quot;Bob&quot;)], schema=[&quot;height&quot;, &quot;name&quot;])</span>
<span class="sd"> Write a DataFrame into a Parquet file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;parquet1&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a Parquet file.</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.parquet(d).orderBy(&quot;name&quot;).show()</span>
<span class="sd"> +---+-----+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+-----+</span>
<span class="sd"> | 10|Alice|</span>
<span class="sd"> | 15| Bob|</span>
<span class="sd"> | 20| Tom|</span>
<span class="sd"> +---+-----+</span>
<span class="sd"> Read a Parquet file with a specific column.</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;parquet2&quot;) as d:</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file with only the &#39;name&#39; column.</span>
<span class="sd"> ... spark.read.schema(&quot;name string&quot;).parquet(d).orderBy(&quot;name&quot;).show()</span>
<span class="sd"> +-----+</span>
<span class="sd"> | name|</span>
<span class="sd"> +-----+</span>
<span class="sd"> |Alice|</span>
<span class="sd"> | Bob|</span>
<span class="sd"> | Tom|</span>
<span class="sd"> +-----+</span>
<span class="sd"> Read multiple Parquet files and merge schema.</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;parquet3&quot;) as d1:</span>
<span class="sd"> ... with tempfile.TemporaryDirectory(prefix=&quot;parquet4&quot;) as d2:</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d1)</span>
<span class="sd"> ... df2.write.mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d2)</span>
<span class="sd"> ...</span>
<span class="sd"> ... spark.read.option(</span>
<span class="sd"> ... &quot;mergeSchema&quot;, &quot;true&quot;</span>
<span class="sd"> ... ).parquet(d1, d2).select(</span>
<span class="sd"> ... &quot;name&quot;, &quot;age&quot;, &quot;height&quot;</span>
<span class="sd"> ... ).orderBy(&quot;name&quot;, &quot;age&quot;).show()</span>
<span class="sd"> +-----+----+------+</span>
<span class="sd"> | name| age|height|</span>
<span class="sd"> +-----+----+------+</span>
<span class="sd"> |Alice|NULL| 70|</span>
<span class="sd"> |Alice| 10| NULL|</span>
<span class="sd"> | Bob|NULL| 80|</span>
<span class="sd"> | Bob| 15| NULL|</span>
<span class="sd"> | Tom| 20| NULL|</span>
<span class="sd"> +-----+----+------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_seq</span>
<span class="n">mergeSchema</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;mergeSchema&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">pathGlobFilter</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;pathGlobFilter&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">modifiedBefore</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;modifiedBefore&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">modifiedAfter</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;modifiedAfter&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">recursiveFileLookup</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;recursiveFileLookup&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">datetimeRebaseMode</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;datetimeRebaseMode&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">int96RebaseMode</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;int96RebaseMode&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">mergeSchema</span><span class="o">=</span><span class="n">mergeSchema</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="o">=</span><span class="n">pathGlobFilter</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="o">=</span><span class="n">recursiveFileLookup</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="o">=</span><span class="n">modifiedBefore</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="o">=</span><span class="n">modifiedAfter</span><span class="p">,</span>
<span class="n">datetimeRebaseMode</span><span class="o">=</span><span class="n">datetimeRebaseMode</span><span class="p">,</span>
<span class="n">int96RebaseMode</span><span class="o">=</span><span class="n">int96RebaseMode</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">parquet</span><span class="p">(</span><span class="n">_to_seq</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="p">,</span> <span class="n">paths</span><span class="p">)))</span></div>
<div class="viewcode-block" id="DataFrameReader.text"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.text.html#pyspark.sql.DataFrameReader.text">[docs]</a> <span class="k">def</span> <span class="nf">text</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">paths</span><span class="p">:</span> <span class="n">PathOrPaths</span><span class="p">,</span>
<span class="n">wholetext</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">lineSep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads text files and returns a :class:`DataFrame` whose schema starts with a</span>
<span class="sd"> string column named &quot;value&quot;, and followed by partitioned columns if there</span>
<span class="sd"> are any.</span>
<span class="sd"> The text files must be encoded as UTF-8.</span>
<span class="sd"> By default, each line in the text file is a new row in the resulting DataFrame.</span>
<span class="sd"> .. versionadded:: 1.6.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> paths : str or list</span>
<span class="sd"> string, or list of strings, for input path(s).</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a text file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;text&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a text file</span>
<span class="sd"> ... df = spark.createDataFrame([(&quot;a&quot;,), (&quot;b&quot;,), (&quot;c&quot;,)], schema=[&quot;alphabets&quot;])</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).format(&quot;text&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the text file as a DataFrame.</span>
<span class="sd"> ... spark.read.schema(df.schema).text(d).sort(&quot;alphabets&quot;).show()</span>
<span class="sd"> +---------+</span>
<span class="sd"> |alphabets|</span>
<span class="sd"> +---------+</span>
<span class="sd"> | a|</span>
<span class="sd"> | b|</span>
<span class="sd"> | c|</span>
<span class="sd"> +---------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">wholetext</span><span class="o">=</span><span class="n">wholetext</span><span class="p">,</span>
<span class="n">lineSep</span><span class="o">=</span><span class="n">lineSep</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="o">=</span><span class="n">pathGlobFilter</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="o">=</span><span class="n">recursiveFileLookup</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="o">=</span><span class="n">modifiedBefore</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="o">=</span><span class="n">modifiedAfter</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">paths</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">paths</span> <span class="o">=</span> <span class="p">[</span><span class="n">paths</span><span class="p">]</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toSeq</span><span class="p">(</span><span class="n">paths</span><span class="p">)))</span></div>
<div class="viewcode-block" id="DataFrameReader.csv"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.csv.html#pyspark.sql.DataFrameReader.csv">[docs]</a> <span class="k">def</span> <span class="nf">csv</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="n">PathOrPaths</span><span class="p">,</span>
<span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">sep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">quote</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">escape</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">comment</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">header</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inferSchema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreLeadingWhiteSpace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreTrailingWhiteSpace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nullValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nanValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">positiveInf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">negativeInf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">maxColumns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">maxCharsPerColumn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">maxMalformedLogPerPartition</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">columnNameOfCorruptRecord</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multiLine</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">charToEscapeQuoteEscaping</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">enforceSchema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">emptyValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">lineSep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">unescapedQuoteHandling</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;Loads a CSV file and returns the result as a :class:`DataFrame`.</span>
<span class="sd"> This function will go through the input once to determine the input schema if</span>
<span class="sd"> ``inferSchema`` is enabled. To avoid going through the entire data once, disable</span>
<span class="sd"> ``inferSchema`` option or specify the schema explicitly using ``schema``.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str or list</span>
<span class="sd"> string, or list of strings, for input path(s),</span>
<span class="sd"> or RDD of Strings storing CSV rows.</span>
<span class="sd"> schema : :class:`pyspark.sql.types.StructType` or str, optional</span>
<span class="sd"> an optional :class:`pyspark.sql.types.StructType` for the input schema</span>
<span class="sd"> or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a CSV file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;csv&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file</span>
<span class="sd"> ... df = spark.createDataFrame([{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}])</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).format(&quot;csv&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;.</span>
<span class="sd"> ... spark.read.csv(d, schema=df.schema, nullValue=&quot;Hyukjin Kwon&quot;).show()</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |age|name|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |100|NULL|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span>
<span class="n">sep</span><span class="o">=</span><span class="n">sep</span><span class="p">,</span>
<span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span>
<span class="n">quote</span><span class="o">=</span><span class="n">quote</span><span class="p">,</span>
<span class="n">escape</span><span class="o">=</span><span class="n">escape</span><span class="p">,</span>
<span class="n">comment</span><span class="o">=</span><span class="n">comment</span><span class="p">,</span>
<span class="n">header</span><span class="o">=</span><span class="n">header</span><span class="p">,</span>
<span class="n">inferSchema</span><span class="o">=</span><span class="n">inferSchema</span><span class="p">,</span>
<span class="n">ignoreLeadingWhiteSpace</span><span class="o">=</span><span class="n">ignoreLeadingWhiteSpace</span><span class="p">,</span>
<span class="n">ignoreTrailingWhiteSpace</span><span class="o">=</span><span class="n">ignoreTrailingWhiteSpace</span><span class="p">,</span>
<span class="n">nullValue</span><span class="o">=</span><span class="n">nullValue</span><span class="p">,</span>
<span class="n">nanValue</span><span class="o">=</span><span class="n">nanValue</span><span class="p">,</span>
<span class="n">positiveInf</span><span class="o">=</span><span class="n">positiveInf</span><span class="p">,</span>
<span class="n">negativeInf</span><span class="o">=</span><span class="n">negativeInf</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="o">=</span><span class="n">dateFormat</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="o">=</span><span class="n">timestampFormat</span><span class="p">,</span>
<span class="n">maxColumns</span><span class="o">=</span><span class="n">maxColumns</span><span class="p">,</span>
<span class="n">maxCharsPerColumn</span><span class="o">=</span><span class="n">maxCharsPerColumn</span><span class="p">,</span>
<span class="n">maxMalformedLogPerPartition</span><span class="o">=</span><span class="n">maxMalformedLogPerPartition</span><span class="p">,</span>
<span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span>
<span class="n">columnNameOfCorruptRecord</span><span class="o">=</span><span class="n">columnNameOfCorruptRecord</span><span class="p">,</span>
<span class="n">multiLine</span><span class="o">=</span><span class="n">multiLine</span><span class="p">,</span>
<span class="n">charToEscapeQuoteEscaping</span><span class="o">=</span><span class="n">charToEscapeQuoteEscaping</span><span class="p">,</span>
<span class="n">samplingRatio</span><span class="o">=</span><span class="n">samplingRatio</span><span class="p">,</span>
<span class="n">enforceSchema</span><span class="o">=</span><span class="n">enforceSchema</span><span class="p">,</span>
<span class="n">emptyValue</span><span class="o">=</span><span class="n">emptyValue</span><span class="p">,</span>
<span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">,</span>
<span class="n">lineSep</span><span class="o">=</span><span class="n">lineSep</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="o">=</span><span class="n">pathGlobFilter</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="o">=</span><span class="n">recursiveFileLookup</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="o">=</span><span class="n">modifiedBefore</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="o">=</span><span class="n">modifiedAfter</span><span class="p">,</span>
<span class="n">unescapedQuoteHandling</span><span class="o">=</span><span class="n">unescapedQuoteHandling</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">path</span> <span class="o">=</span> <span class="p">[</span><span class="n">path</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="o">==</span> <span class="nb">list</span><span class="p">:</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toSeq</span><span class="p">(</span><span class="n">path</span><span class="p">)))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_remote_only</span><span class="p">():</span>
<span class="kn">from</span> <span class="nn">pyspark.core.rdd</span> <span class="kn">import</span> <span class="n">RDD</span> <span class="c1"># noqa: F401</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_remote_only</span><span class="p">()</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">func</span><span class="p">(</span><span class="n">iterator</span><span class="p">):</span>
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">iterator</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span>
<span class="k">yield</span> <span class="n">x</span>
<span class="n">keyed</span> <span class="o">=</span> <span class="n">path</span><span class="o">.</span><span class="n">mapPartitions</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="n">keyed</span><span class="o">.</span><span class="n">_bypass_serializer</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">jrdd</span> <span class="o">=</span> <span class="n">keyed</span><span class="o">.</span><span class="n">_jrdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">BytesToString</span><span class="p">())</span>
<span class="c1"># see SPARK-22112</span>
<span class="c1"># There aren&#39;t any jvm api for creating a dataframe from rdd storing csv.</span>
<span class="c1"># We can do it through creating a jvm dataset firstly and using the jvm api</span>
<span class="c1"># for creating a dataframe from dataset storing csv.</span>
<span class="n">jdataset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="o">.</span><span class="n">createDataset</span><span class="p">(</span>
<span class="n">jrdd</span><span class="o">.</span><span class="n">rdd</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">Encoders</span><span class="o">.</span><span class="n">STRING</span><span class="p">()</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">csv</span><span class="p">(</span><span class="n">jdataset</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_STR_OR_LIST_OF_RDD&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;path&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span></div>
<span class="k">def</span> <span class="nf">xml</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="s2">&quot;RDD[str]&quot;</span><span class="p">],</span>
<span class="n">rowTag</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">schema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">StructType</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">excludeAttribute</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">attributePrefix</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">valueTag</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreSurroundingSpaces</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">rowValidationXSDPath</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreNamespace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">wildcardColName</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inferSchema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nullValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">columnNameOfCorruptRecord</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multiLine</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">samplingRatio</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;Loads a XML file and returns the result as a :class:`DataFrame`.</span>
<span class="sd"> If the ``schema`` parameter is not specified, this function goes</span>
<span class="sd"> through the input once to determine the input schema.</span>
<span class="sd"> .. versionadded:: 4.0.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str, list or :class:`RDD`</span>
<span class="sd"> string, or list of strings, for input path(s),</span>
<span class="sd"> or RDD of Strings storing XML rows.</span>
<span class="sd"> schema : :class:`pyspark.sql.types.StructType` or str, optional</span>
<span class="sd"> an optional :class:`pyspark.sql.types.StructType` for the input schema</span>
<span class="sd"> or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a XML file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;xml&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a XML file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).option(&quot;rowTag&quot;, &quot;person&quot;).format(&quot;xml&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the XML file as a DataFrame.</span>
<span class="sd"> ... spark.read.option(&quot;rowTag&quot;, &quot;person&quot;).xml(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">rowTag</span><span class="o">=</span><span class="n">rowTag</span><span class="p">,</span>
<span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span>
<span class="n">excludeAttribute</span><span class="o">=</span><span class="n">excludeAttribute</span><span class="p">,</span>
<span class="n">attributePrefix</span><span class="o">=</span><span class="n">attributePrefix</span><span class="p">,</span>
<span class="n">valueTag</span><span class="o">=</span><span class="n">valueTag</span><span class="p">,</span>
<span class="n">ignoreSurroundingSpaces</span><span class="o">=</span><span class="n">ignoreSurroundingSpaces</span><span class="p">,</span>
<span class="n">rowValidationXSDPath</span><span class="o">=</span><span class="n">rowValidationXSDPath</span><span class="p">,</span>
<span class="n">ignoreNamespace</span><span class="o">=</span><span class="n">ignoreNamespace</span><span class="p">,</span>
<span class="n">wildcardColName</span><span class="o">=</span><span class="n">wildcardColName</span><span class="p">,</span>
<span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span>
<span class="n">inferSchema</span><span class="o">=</span><span class="n">inferSchema</span><span class="p">,</span>
<span class="n">nullValue</span><span class="o">=</span><span class="n">nullValue</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="o">=</span><span class="n">dateFormat</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="o">=</span><span class="n">timestampFormat</span><span class="p">,</span>
<span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span>
<span class="n">columnNameOfCorruptRecord</span><span class="o">=</span><span class="n">columnNameOfCorruptRecord</span><span class="p">,</span>
<span class="n">multiLine</span><span class="o">=</span><span class="n">multiLine</span><span class="p">,</span>
<span class="n">samplingRatio</span><span class="o">=</span><span class="n">samplingRatio</span><span class="p">,</span>
<span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">path</span> <span class="o">=</span> <span class="p">[</span><span class="n">path</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="o">==</span> <span class="nb">list</span><span class="p">:</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">xml</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toSeq</span><span class="p">(</span><span class="n">path</span><span class="p">)))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_remote_only</span><span class="p">():</span>
<span class="kn">from</span> <span class="nn">pyspark.core.rdd</span> <span class="kn">import</span> <span class="n">RDD</span> <span class="c1"># noqa: F401</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_remote_only</span><span class="p">()</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">func</span><span class="p">(</span><span class="n">iterator</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">:</span>
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">iterator</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span>
<span class="k">yield</span> <span class="n">x</span>
<span class="n">keyed</span> <span class="o">=</span> <span class="n">path</span><span class="o">.</span><span class="n">mapPartitions</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="n">keyed</span><span class="o">.</span><span class="n">_bypass_serializer</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">jrdd</span> <span class="o">=</span> <span class="n">keyed</span><span class="o">.</span><span class="n">_jrdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">BytesToString</span><span class="p">())</span>
<span class="c1"># There isn&#39;t any jvm api for creating a dataframe from rdd storing XML.</span>
<span class="c1"># We can do it through creating a jvm dataset first and using the jvm api</span>
<span class="c1"># for creating a dataframe from dataset storing XML.</span>
<span class="n">jdataset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="o">.</span><span class="n">createDataset</span><span class="p">(</span>
<span class="n">jrdd</span><span class="o">.</span><span class="n">rdd</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">Encoders</span><span class="o">.</span><span class="n">STRING</span><span class="p">()</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">xml</span><span class="p">(</span><span class="n">jdataset</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_STR_OR_LIST_OF_RDD&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;path&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<div class="viewcode-block" id="DataFrameReader.orc"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.orc.html#pyspark.sql.DataFrameReader.orc">[docs]</a> <span class="k">def</span> <span class="nf">orc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="n">PathOrPaths</span><span class="p">,</span>
<span class="n">mergeSchema</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Loads ORC files, returning the result as a :class:`DataFrame`.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str or list</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a ORC file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;orc&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a ORC file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;orc&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.orc(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_seq</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">mergeSchema</span><span class="o">=</span><span class="n">mergeSchema</span><span class="p">,</span>
<span class="n">pathGlobFilter</span><span class="o">=</span><span class="n">pathGlobFilter</span><span class="p">,</span>
<span class="n">modifiedBefore</span><span class="o">=</span><span class="n">modifiedBefore</span><span class="p">,</span>
<span class="n">modifiedAfter</span><span class="o">=</span><span class="n">modifiedAfter</span><span class="p">,</span>
<span class="n">recursiveFileLookup</span><span class="o">=</span><span class="n">recursiveFileLookup</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">path</span> <span class="o">=</span> <span class="p">[</span><span class="n">path</span><span class="p">]</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">orc</span><span class="p">(</span><span class="n">_to_seq</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="p">,</span> <span class="n">path</span><span class="p">)))</span></div>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">jdbc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">table</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">jdbc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">table</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">column</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">lowerBound</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">upperBound</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">properties</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">jdbc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">table</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">predicates</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span>
<span class="n">properties</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="o">...</span>
<div class="viewcode-block" id="DataFrameReader.jdbc"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameReader.jdbc.html#pyspark.sql.DataFrameReader.jdbc">[docs]</a> <span class="k">def</span> <span class="nf">jdbc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">table</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">column</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">lowerBound</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">upperBound</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">numPartitions</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">predicates</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">properties</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Construct a :class:`DataFrame` representing the database table named ``table``</span>
<span class="sd"> accessible via JDBC URL ``url`` and connection ``properties``.</span>
<span class="sd"> Partitions of the table will be retrieved in parallel if either ``column`` or</span>
<span class="sd"> ``predicates`` is specified. ``lowerBound``, ``upperBound`` and ``numPartitions``</span>
<span class="sd"> is needed when ``column`` is specified.</span>
<span class="sd"> If both ``column`` and ``predicates`` are specified, ``column`` will be used.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> table : str</span>
<span class="sd"> the name of the table</span>
<span class="sd"> column : str, optional</span>
<span class="sd"> alias of ``partitionColumn`` option. Refer to ``partitionColumn`` in</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> predicates : list, optional</span>
<span class="sd"> a list of expressions suitable for inclusion in WHERE clauses;</span>
<span class="sd"> each one defines one partition of the :class:`DataFrame`</span>
<span class="sd"> properties : dict, optional</span>
<span class="sd"> a dictionary of JDBC database connection arguments. Normally at</span>
<span class="sd"> least properties &quot;user&quot; and &quot;password&quot; with their corresponding values.</span>
<span class="sd"> For example { &#39;user&#39; : &#39;SYSTEM&#39;, &#39;password&#39; : &#39;mypassword&#39; }</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Don&#39;t create too many partitions in parallel on a large cluster;</span>
<span class="sd"> otherwise Spark might crash your external database systems.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :class:`DataFrame`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaClass</span>
<span class="k">if</span> <span class="n">properties</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">properties</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_gateway</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">jprop</span> <span class="o">=</span> <span class="n">JavaClass</span><span class="p">(</span>
<span class="s2">&quot;java.util.Properties&quot;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_gateway</span><span class="o">.</span><span class="n">_gateway_client</span><span class="p">,</span>
<span class="p">)()</span>
<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">properties</span><span class="p">:</span>
<span class="n">jprop</span><span class="o">.</span><span class="n">setProperty</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">properties</span><span class="p">[</span><span class="n">k</span><span class="p">])</span>
<span class="k">if</span> <span class="n">column</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">lowerBound</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> <span class="s2">&quot;lowerBound can not be None when ``column`` is specified&quot;</span>
<span class="k">assert</span> <span class="n">upperBound</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> <span class="s2">&quot;upperBound can not be None when ``column`` is specified&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">numPartitions</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="p">),</span> <span class="s2">&quot;numPartitions can not be None when ``column`` is specified&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">jdbc</span><span class="p">(</span>
<span class="n">url</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">column</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">lowerBound</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">upperBound</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">numPartitions</span><span class="p">),</span> <span class="n">jprop</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">predicates</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">gateway</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_gateway</span>
<span class="k">assert</span> <span class="n">gateway</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">jpredicates</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">toJArray</span><span class="p">(</span><span class="n">gateway</span><span class="p">,</span> <span class="n">gateway</span><span class="o">.</span><span class="n">jvm</span><span class="o">.</span><span class="n">java</span><span class="o">.</span><span class="n">lang</span><span class="o">.</span><span class="n">String</span><span class="p">,</span> <span class="n">predicates</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">jdbc</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">jpredicates</span><span class="p">,</span> <span class="n">jprop</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_df</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jreader</span><span class="o">.</span><span class="n">jdbc</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">jprop</span><span class="p">))</span></div></div>
<div class="viewcode-block" id="DataFrameWriter"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.html#pyspark.sql.DataFrameWriter">[docs]</a><span class="k">class</span> <span class="nc">DataFrameWriter</span><span class="p">(</span><span class="n">OptionUtils</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interface used to write a :class:`DataFrame` to external storage systems</span>
<span class="sd"> (e.g. file systems, key-value stores, etc). Use :attr:`DataFrame.write`</span>
<span class="sd"> to access this.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_df</span> <span class="o">=</span> <span class="n">df</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">sparkSession</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="o">.</span><span class="n">write</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_sq</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">jsq</span><span class="p">:</span> <span class="s2">&quot;JavaObject&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;StreamingQuery&quot;</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.streaming</span> <span class="kn">import</span> <span class="n">StreamingQuery</span>
<span class="k">return</span> <span class="n">StreamingQuery</span><span class="p">(</span><span class="n">jsq</span><span class="p">)</span>
<div class="viewcode-block" id="DataFrameWriter.mode"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.mode.html#pyspark.sql.DataFrameWriter.mode">[docs]</a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">saveMode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Specifies the behavior when data or table already exists.</span>
<span class="sd"> Options include:</span>
<span class="sd"> * `append`: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * `overwrite`: Overwrite existing data.</span>
<span class="sd"> * `error` or `errorifexists`: Throw an exception if data already exists.</span>
<span class="sd"> * `ignore`: Silently ignore this operation if data already exists.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Raise an error when writing to an existing path.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;mode1&quot;) as d:</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 80, &quot;name&quot;: &quot;Xinrong Meng&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;error&quot;).format(&quot;parquet&quot;).save(d) # doctest: +SKIP</span>
<span class="sd"> Traceback (most recent call last):</span>
<span class="sd"> ...</span>
<span class="sd"> ...AnalysisException: ...</span>
<span class="sd"> Write a Parquet file back with various options, and read it back.</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;mode2&quot;) as d:</span>
<span class="sd"> ... # Overwrite the path with a new Parquet file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Append another DataFrame into the Parquet file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 120, &quot;name&quot;: &quot;Takuya Ueshin&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;append&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Append another DataFrame into the Parquet file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 140, &quot;name&quot;: &quot;Haejoon Lee&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;ignore&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.parquet(d).show()</span>
<span class="sd"> +---+-------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+-------------+</span>
<span class="sd"> |120|Takuya Ueshin|</span>
<span class="sd"> |100| Hyukjin Kwon|</span>
<span class="sd"> +---+-------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># At the JVM side, the default value of mode is already set to &quot;error&quot;.</span>
<span class="c1"># So, if the given saveMode is None, we will not call JVM-side&#39;s mode method.</span>
<span class="k">if</span> <span class="n">saveMode</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">saveMode</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriter.format"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.format.html#pyspark.sql.DataFrameWriter.format">[docs]</a> <span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Specifies the underlying output data source.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> source : str</span>
<span class="sd"> string, name of the data source, e.g. &#39;json&#39;, &#39;parquet&#39;.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.range(1).write.format(&#39;parquet&#39;)</span>
<span class="sd"> &lt;...readwriter.DataFrameWriter object ...&gt;</span>
<span class="sd"> Write a DataFrame into a Parquet file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;format&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a Parquet file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.format(&#39;parquet&#39;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriter.option"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.option.html#pyspark.sql.DataFrameWriter.option">[docs]</a> <span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds an output option for the underlying data source.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> key : str</span>
<span class="sd"> The key for the option to set.</span>
<span class="sd"> value</span>
<span class="sd"> The value for the option to set.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.range(1).write.option(&quot;key&quot;, &quot;value&quot;)</span>
<span class="sd"> &lt;...readwriter.DataFrameWriter object ...&gt;</span>
<span class="sd"> Specify the option &#39;nullValue&#39; with writing a CSV file.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;option&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;.</span>
<span class="sd"> ... df = spark.createDataFrame([(100, None)], &quot;age INT, name STRING&quot;)</span>
<span class="sd"> ... df.write.option(&quot;nullValue&quot;, &quot;Hyukjin Kwon&quot;).mode(&quot;overwrite&quot;).format(&quot;csv&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame.</span>
<span class="sd"> ... spark.read.schema(df.schema).format(&#39;csv&#39;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">to_str</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriter.options"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.options.html#pyspark.sql.DataFrameWriter.options">[docs]</a> <span class="k">def</span> <span class="nf">options</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds output options for the underlying data source.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> **options : dict</span>
<span class="sd"> The dictionary of string keys and primitive-type values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; spark.range(1).write.options(key=&quot;value&quot;)</span>
<span class="sd"> &lt;...readwriter.DataFrameWriter object ...&gt;</span>
<span class="sd"> Specify options in a dictionary.</span>
<span class="sd"> &gt;&gt;&gt; spark.range(1).write.options(**{&quot;k1&quot;: &quot;v1&quot;, &quot;k2&quot;: &quot;v2&quot;})</span>
<span class="sd"> &lt;...readwriter.DataFrameWriter object ...&gt;</span>
<span class="sd"> Specify the option &#39;nullValue&#39; and &#39;header&#39; with writing a CSV file.</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.sql.types import StructType,StructField, StringType, IntegerType</span>
<span class="sd"> &gt;&gt;&gt; schema = StructType([</span>
<span class="sd"> ... StructField(&quot;age&quot;,IntegerType(),True),</span>
<span class="sd"> ... StructField(&quot;name&quot;,StringType(),True),</span>
<span class="sd"> ... ])</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;options&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;,</span>
<span class="sd"> ... # and &#39;header&#39; option set to `True`.</span>
<span class="sd"> ... df = spark.createDataFrame([(100, None)], schema=schema)</span>
<span class="sd"> ... df.write.options(nullValue=&quot;Hyukjin Kwon&quot;, header=True).mode(</span>
<span class="sd"> ... &quot;overwrite&quot;).format(&quot;csv&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame.</span>
<span class="sd"> ... spark.read.option(&quot;header&quot;, True).format(&#39;csv&#39;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">options</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">to_str</span><span class="p">(</span><span class="n">options</span><span class="p">[</span><span class="n">k</span><span class="p">]))</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">partitionBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">partitionBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="o">...</span>
<div class="viewcode-block" id="DataFrameWriter.partitionBy"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.partitionBy.html#pyspark.sql.DataFrameWriter.partitionBy">[docs]</a> <span class="k">def</span> <span class="nf">partitionBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Partitions the output by the given columns on the file system.</span>
<span class="sd"> If specified, the output is laid out on the file system similar</span>
<span class="sd"> to Hive&#39;s partitioning scheme.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> cols : str or list</span>
<span class="sd"> name of columns</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a Parquet file in a partitioned manner, and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; import os</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;partitionBy&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a Parquet file in a partitioned manner.</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}, {&quot;age&quot;: 120, &quot;name&quot;: &quot;Ruifeng Zheng&quot;}]</span>
<span class="sd"> ... ).write.partitionBy(&quot;name&quot;).mode(&quot;overwrite&quot;).format(&quot;parquet&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.parquet(d).sort(&quot;age&quot;).show()</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read one partition as a DataFrame.</span>
<span class="sd"> ... spark.read.parquet(f&quot;{d}{os.path.sep}name=Hyukjin Kwon&quot;).show()</span>
<span class="sd"> +---+-------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+-------------+</span>
<span class="sd"> |100| Hyukjin Kwon|</span>
<span class="sd"> |120|Ruifeng Zheng|</span>
<span class="sd"> +---+-------------+</span>
<span class="sd"> +---+</span>
<span class="sd"> |age|</span>
<span class="sd"> +---+</span>
<span class="sd"> |100|</span>
<span class="sd"> +---+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_seq</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cols</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
<span class="n">cols</span> <span class="o">=</span> <span class="n">cols</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># type: ignore[assignment]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="n">_to_seq</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="p">,</span> <span class="n">cast</span><span class="p">(</span><span class="n">Iterable</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">],</span> <span class="n">cols</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">bucketBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">bucketBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">TupleOrListOfString</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="o">...</span>
<div class="viewcode-block" id="DataFrameWriter.bucketBy"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.bucketBy.html#pyspark.sql.DataFrameWriter.bucketBy">[docs]</a> <span class="k">def</span> <span class="nf">bucketBy</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">TupleOrListOfString</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Buckets the output by the given columns. If specified,</span>
<span class="sd"> the output is laid out on the file system similar to Hive&#39;s bucketing scheme,</span>
<span class="sd"> but with a different bucket hash function and is not compatible with Hive&#39;s bucketing.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> numBuckets : int</span>
<span class="sd"> the number of buckets to save</span>
<span class="sd"> col : str, list or tuple</span>
<span class="sd"> a name of a column, or a list of names.</span>
<span class="sd"> cols : str</span>
<span class="sd"> additional names (optional). If `col` is a list it should be empty.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Applicable for file-based data sources in combination with</span>
<span class="sd"> :py:meth:`DataFrameWriter.saveAsTable`.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a Parquet file in a buckted manner, and read it back.</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.sql.functions import input_file_name</span>
<span class="sd"> &gt;&gt;&gt; # Write a DataFrame into a Parquet file in a bucketed manner.</span>
<span class="sd"> ... _ = spark.sql(&quot;DROP TABLE IF EXISTS bucketed_table&quot;)</span>
<span class="sd"> &gt;&gt;&gt; spark.createDataFrame([</span>
<span class="sd"> ... (100, &quot;Hyukjin Kwon&quot;), (120, &quot;Hyukjin Kwon&quot;), (140, &quot;Haejoon Lee&quot;)],</span>
<span class="sd"> ... schema=[&quot;age&quot;, &quot;name&quot;]</span>
<span class="sd"> ... ).write.bucketBy(2, &quot;name&quot;).mode(&quot;overwrite&quot;).saveAsTable(&quot;bucketed_table&quot;)</span>
<span class="sd"> &gt;&gt;&gt; # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.table(&quot;bucketed_table&quot;).sort(&quot;age&quot;).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> |120|Hyukjin Kwon|</span>
<span class="sd"> |140| Haejoon Lee|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE bucketed_table&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_seq</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">numBuckets</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_INT&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;numBuckets&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">numBuckets</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
<span class="k">if</span> <span class="n">cols</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;CANNOT_SET_TOGETHER&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_list&quot;</span><span class="p">:</span> <span class="sa">f</span><span class="s2">&quot;`col` of type </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2"> and `cols`&quot;</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="n">col</span><span class="p">,</span> <span class="n">cols</span> <span class="o">=</span> <span class="n">col</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">col</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="c1"># type: ignore[assignment]</span>
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_LIST_OF_STR&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;cols&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_LIST_OF_STR&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;col&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">bucketBy</span><span class="p">(</span>
<span class="n">numBuckets</span><span class="p">,</span> <span class="n">col</span><span class="p">,</span> <span class="n">_to_seq</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="p">,</span> <span class="n">cast</span><span class="p">(</span><span class="n">Iterable</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">],</span> <span class="n">cols</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">sortBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">sortBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">TupleOrListOfString</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="o">...</span>
<div class="viewcode-block" id="DataFrameWriter.sortBy"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.sortBy.html#pyspark.sql.DataFrameWriter.sortBy">[docs]</a> <span class="k">def</span> <span class="nf">sortBy</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">TupleOrListOfString</span><span class="p">],</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Sorts the output in each bucket by the given columns on the file system.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> col : str, tuple or list</span>
<span class="sd"> a name of a column, or a list of names.</span>
<span class="sd"> cols : str</span>
<span class="sd"> additional names (optional). If `col` is a list it should be empty.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a Parquet file in a sorted-buckted manner, and read it back.</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.sql.functions import input_file_name</span>
<span class="sd"> &gt;&gt;&gt; # Write a DataFrame into a Parquet file in a sorted-bucketed manner.</span>
<span class="sd"> ... _ = spark.sql(&quot;DROP TABLE IF EXISTS sorted_bucketed_table&quot;)</span>
<span class="sd"> &gt;&gt;&gt; spark.createDataFrame([</span>
<span class="sd"> ... (100, &quot;Hyukjin Kwon&quot;), (120, &quot;Hyukjin Kwon&quot;), (140, &quot;Haejoon Lee&quot;)],</span>
<span class="sd"> ... schema=[&quot;age&quot;, &quot;name&quot;]</span>
<span class="sd"> ... ).write.bucketBy(1, &quot;name&quot;).sortBy(&quot;age&quot;).mode(</span>
<span class="sd"> ... &quot;overwrite&quot;).saveAsTable(&quot;sorted_bucketed_table&quot;)</span>
<span class="sd"> &gt;&gt;&gt; # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.table(&quot;sorted_bucketed_table&quot;).sort(&quot;age&quot;).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> |120|Hyukjin Kwon|</span>
<span class="sd"> |140| Haejoon Lee|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE sorted_bucketed_table&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_seq</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
<span class="k">if</span> <span class="n">cols</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;CANNOT_SET_TOGETHER&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_list&quot;</span><span class="p">:</span> <span class="sa">f</span><span class="s2">&quot;`col` of type </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2"> and `cols`&quot;</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="n">col</span><span class="p">,</span> <span class="n">cols</span> <span class="o">=</span> <span class="n">col</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">col</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="c1"># type: ignore[assignment]</span>
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_LIST_OF_STR&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;cols&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span>
<span class="n">error_class</span><span class="o">=</span><span class="s2">&quot;NOT_LIST_OF_STR&quot;</span><span class="p">,</span>
<span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;arg_name&quot;</span><span class="p">:</span> <span class="s2">&quot;col&quot;</span><span class="p">,</span>
<span class="s2">&quot;arg_type&quot;</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">sortBy</span><span class="p">(</span>
<span class="n">col</span><span class="p">,</span> <span class="n">_to_seq</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="p">,</span> <span class="n">cast</span><span class="p">(</span><span class="n">Iterable</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">],</span> <span class="n">cols</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriter.save"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.save.html#pyspark.sql.DataFrameWriter.save">[docs]</a> <span class="k">def</span> <span class="nf">save</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="nb">format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">partitionBy</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the contents of the :class:`DataFrame` to a data source.</span>
<span class="sd"> The data source is specified by the ``format`` and a set of ``options``.</span>
<span class="sd"> If ``format`` is not specified, the default data source configured by</span>
<span class="sd"> ``spark.sql.sources.default`` will be used.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str, optional</span>
<span class="sd"> the path in a Hadoop supported file system</span>
<span class="sd"> format : str, optional</span>
<span class="sd"> the format used to save</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> partitionBy : list, optional</span>
<span class="sd"> names of partitioning columns</span>
<span class="sd"> **options : dict</span>
<span class="sd"> all other string options</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a JSON file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;save&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a JSON file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).format(&quot;json&quot;).save(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the JSON file as a DataFrame.</span>
<span class="sd"> ... spark.read.format(&#39;json&#39;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span><span class="o">.</span><span class="n">options</span><span class="p">(</span><span class="o">**</span><span class="n">options</span><span class="p">)</span>
<span class="k">if</span> <span class="n">partitionBy</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">partitionBy</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">format</span><span class="p">)</span>
<span class="k">if</span> <span class="n">path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">save</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.insertInto"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.insertInto.html#pyspark.sql.DataFrameWriter.insertInto">[docs]</a> <span class="k">def</span> <span class="nf">insertInto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tableName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Inserts the content of the :class:`DataFrame` to the specified table.</span>
<span class="sd"> It requires that the schema of the :class:`DataFrame` is the same as the</span>
<span class="sd"> schema of the table.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> overwrite : bool, optional</span>
<span class="sd"> If true, overwrites existing data. Disabled by default</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Unlike :meth:`DataFrameWriter.saveAsTable`, :meth:`DataFrameWriter.insertInto` ignores</span>
<span class="sd"> the column names and just uses position-based resolution.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE IF EXISTS tblA&quot;)</span>
<span class="sd"> &gt;&gt;&gt; df = spark.createDataFrame([</span>
<span class="sd"> ... (100, &quot;Hyukjin Kwon&quot;), (120, &quot;Hyukjin Kwon&quot;), (140, &quot;Haejoon Lee&quot;)],</span>
<span class="sd"> ... schema=[&quot;age&quot;, &quot;name&quot;]</span>
<span class="sd"> ... )</span>
<span class="sd"> &gt;&gt;&gt; df.write.saveAsTable(&quot;tblA&quot;)</span>
<span class="sd"> Insert the data into &#39;tblA&#39; table but with different column names.</span>
<span class="sd"> &gt;&gt;&gt; df.selectExpr(&quot;age AS col1&quot;, &quot;name AS col2&quot;).write.insertInto(&quot;tblA&quot;)</span>
<span class="sd"> &gt;&gt;&gt; spark.read.table(&quot;tblA&quot;).sort(&quot;age&quot;).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> |120|Hyukjin Kwon|</span>
<span class="sd"> |120|Hyukjin Kwon|</span>
<span class="sd"> |140| Haejoon Lee|</span>
<span class="sd"> |140| Haejoon Lee|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE tblA&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">overwrite</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="s2">&quot;overwrite&quot;</span> <span class="k">if</span> <span class="n">overwrite</span> <span class="k">else</span> <span class="s2">&quot;append&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">insertInto</span><span class="p">(</span><span class="n">tableName</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.saveAsTable"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.saveAsTable.html#pyspark.sql.DataFrameWriter.saveAsTable">[docs]</a> <span class="k">def</span> <span class="nf">saveAsTable</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="nb">format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">partitionBy</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` as the specified table.</span>
<span class="sd"> In the case the table already exists, behavior of this function depends on the</span>
<span class="sd"> save mode, specified by the `mode` function (default to throwing an exception).</span>
<span class="sd"> When `mode` is `Overwrite`, the schema of the :class:`DataFrame` does not need to be</span>
<span class="sd"> the same as that of the existing table.</span>
<span class="sd"> * `append`: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * `overwrite`: Overwrite existing data.</span>
<span class="sd"> * `error` or `errorifexists`: Throw an exception if data already exists.</span>
<span class="sd"> * `ignore`: Silently ignore this operation if data already exists.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> When `mode` is `Append`, if there is an existing table, we will use the format and</span>
<span class="sd"> options of the existing table. The column order in the schema of the :class:`DataFrame`</span>
<span class="sd"> doesn&#39;t need to be the same as that of the existing table. Unlike</span>
<span class="sd"> :meth:`DataFrameWriter.insertInto`, :meth:`DataFrameWriter.saveAsTable` will use the</span>
<span class="sd"> column names to find the correct column positions.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> name : str</span>
<span class="sd"> the table name</span>
<span class="sd"> format : str, optional</span>
<span class="sd"> the format used to save</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> one of `append`, `overwrite`, `error`, `errorifexists`, `ignore` \</span>
<span class="sd"> (default: error)</span>
<span class="sd"> partitionBy : str or list</span>
<span class="sd"> names of partitioning columns</span>
<span class="sd"> **options : dict</span>
<span class="sd"> all other string options</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Creates a table from a DataFrame, and read it back.</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE IF EXISTS tblA&quot;)</span>
<span class="sd"> &gt;&gt;&gt; spark.createDataFrame([</span>
<span class="sd"> ... (100, &quot;Hyukjin Kwon&quot;), (120, &quot;Hyukjin Kwon&quot;), (140, &quot;Haejoon Lee&quot;)],</span>
<span class="sd"> ... schema=[&quot;age&quot;, &quot;name&quot;]</span>
<span class="sd"> ... ).write.saveAsTable(&quot;tblA&quot;)</span>
<span class="sd"> &gt;&gt;&gt; spark.read.table(&quot;tblA&quot;).sort(&quot;age&quot;).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> |120|Hyukjin Kwon|</span>
<span class="sd"> |140| Haejoon Lee|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &gt;&gt;&gt; _ = spark.sql(&quot;DROP TABLE tblA&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span><span class="o">.</span><span class="n">options</span><span class="p">(</span><span class="o">**</span><span class="n">options</span><span class="p">)</span>
<span class="k">if</span> <span class="n">partitionBy</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">partitionBy</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">format</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">saveAsTable</span><span class="p">(</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.json"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.json.html#pyspark.sql.DataFrameWriter.json">[docs]</a> <span class="k">def</span> <span class="nf">json</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">compression</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">lineSep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreNullFields</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` in JSON format</span>
<span class="sd"> (`JSON Lines text format or newline-delimited JSON &lt;http://jsonlines.org/&gt;`_) at the</span>
<span class="sd"> specified path.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> the path in any Hadoop supported file system</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a JSON file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;json&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a JSON file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.json(d, mode=&quot;overwrite&quot;)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the JSON file as a DataFrame.</span>
<span class="sd"> ... spark.read.format(&quot;json&quot;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="o">=</span><span class="n">dateFormat</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="o">=</span><span class="n">timestampFormat</span><span class="p">,</span>
<span class="n">lineSep</span><span class="o">=</span><span class="n">lineSep</span><span class="p">,</span>
<span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span>
<span class="n">ignoreNullFields</span><span class="o">=</span><span class="n">ignoreNullFields</span><span class="p">,</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">json</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.parquet"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.parquet.html#pyspark.sql.DataFrameWriter.parquet">[docs]</a> <span class="k">def</span> <span class="nf">parquet</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">partitionBy</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">compression</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` in Parquet format at the specified path.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> the path in any Hadoop supported file system</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> partitionBy : str or list, optional</span>
<span class="sd"> names of partitioning columns</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a Parquet file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;parquet&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a Parquet file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.parquet(d, mode=&quot;overwrite&quot;)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.format(&quot;parquet&quot;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span>
<span class="k">if</span> <span class="n">partitionBy</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">partitionBy</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span><span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">parquet</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.text"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.text.html#pyspark.sql.DataFrameWriter.text">[docs]</a> <span class="k">def</span> <span class="nf">text</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">compression</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">lineSep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the content of the DataFrame in a text file at the specified path.</span>
<span class="sd"> The text files will be encoded as UTF-8.</span>
<span class="sd"> .. versionadded:: 1.6.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> the path in any Hadoop supported file system</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> The DataFrame must have only one column that is of string type.</span>
<span class="sd"> Each row becomes a new line in the output file.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a text file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;text&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a text file</span>
<span class="sd"> ... df = spark.createDataFrame([(&quot;a&quot;,), (&quot;b&quot;,), (&quot;c&quot;,)], schema=[&quot;alphabets&quot;])</span>
<span class="sd"> ... df.write.mode(&quot;overwrite&quot;).text(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the text file as a DataFrame.</span>
<span class="sd"> ... spark.read.schema(df.schema).format(&quot;text&quot;).load(d).sort(&quot;alphabets&quot;).show()</span>
<span class="sd"> +---------+</span>
<span class="sd"> |alphabets|</span>
<span class="sd"> +---------+</span>
<span class="sd"> | a|</span>
<span class="sd"> | b|</span>
<span class="sd"> | c|</span>
<span class="sd"> +---------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span><span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span> <span class="n">lineSep</span><span class="o">=</span><span class="n">lineSep</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.csv"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.csv.html#pyspark.sql.DataFrameWriter.csv">[docs]</a> <span class="k">def</span> <span class="nf">csv</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">compression</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">sep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">quote</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">escape</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">header</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nullValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">escapeQuotes</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">quoteAll</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreLeadingWhiteSpace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignoreTrailingWhiteSpace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">charToEscapeQuoteEscaping</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">emptyValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">lineSep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` in CSV format at the specified path.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> the path in any Hadoop supported file system</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a CSV file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;csv&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a CSV file</span>
<span class="sd"> ... df = spark.createDataFrame([{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}])</span>
<span class="sd"> ... df.write.csv(d, mode=&quot;overwrite&quot;)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the CSV file as a DataFrame with &#39;nullValue&#39; option set to &#39;Hyukjin Kwon&#39;.</span>
<span class="sd"> ... spark.read.schema(df.schema).format(&quot;csv&quot;).option(</span>
<span class="sd"> ... &quot;nullValue&quot;, &quot;Hyukjin Kwon&quot;).load(d).show()</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |age|name|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> |100|NULL|</span>
<span class="sd"> +---+----+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span>
<span class="n">sep</span><span class="o">=</span><span class="n">sep</span><span class="p">,</span>
<span class="n">quote</span><span class="o">=</span><span class="n">quote</span><span class="p">,</span>
<span class="n">escape</span><span class="o">=</span><span class="n">escape</span><span class="p">,</span>
<span class="n">header</span><span class="o">=</span><span class="n">header</span><span class="p">,</span>
<span class="n">nullValue</span><span class="o">=</span><span class="n">nullValue</span><span class="p">,</span>
<span class="n">escapeQuotes</span><span class="o">=</span><span class="n">escapeQuotes</span><span class="p">,</span>
<span class="n">quoteAll</span><span class="o">=</span><span class="n">quoteAll</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="o">=</span><span class="n">dateFormat</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="o">=</span><span class="n">timestampFormat</span><span class="p">,</span>
<span class="n">ignoreLeadingWhiteSpace</span><span class="o">=</span><span class="n">ignoreLeadingWhiteSpace</span><span class="p">,</span>
<span class="n">ignoreTrailingWhiteSpace</span><span class="o">=</span><span class="n">ignoreTrailingWhiteSpace</span><span class="p">,</span>
<span class="n">charToEscapeQuoteEscaping</span><span class="o">=</span><span class="n">charToEscapeQuoteEscaping</span><span class="p">,</span>
<span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span>
<span class="n">emptyValue</span><span class="o">=</span><span class="n">emptyValue</span><span class="p">,</span>
<span class="n">lineSep</span><span class="o">=</span><span class="n">lineSep</span><span class="p">,</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">xml</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">rowTag</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">attributePrefix</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">valueTag</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">rootTag</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">declaration</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">arrayElementName</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nullValue</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">compression</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">validateName</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` in XML format at the specified path.</span>
<span class="sd"> .. versionadded:: 4.0.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> the path in any Hadoop supported file system</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a XML file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;xml&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a XML file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.mode(&quot;overwrite&quot;).option(&quot;rowTag&quot;, &quot;person&quot;).xml(d)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the XML file as a DataFrame.</span>
<span class="sd"> ... spark.read.option(&quot;rowTag&quot;, &quot;person&quot;).format(&quot;xml&quot;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span>
<span class="n">rowTag</span><span class="o">=</span><span class="n">rowTag</span><span class="p">,</span>
<span class="n">attributePrefix</span><span class="o">=</span><span class="n">attributePrefix</span><span class="p">,</span>
<span class="n">valueTag</span><span class="o">=</span><span class="n">valueTag</span><span class="p">,</span>
<span class="n">rootTag</span><span class="o">=</span><span class="n">rootTag</span><span class="p">,</span>
<span class="n">declaration</span><span class="o">=</span><span class="n">declaration</span><span class="p">,</span>
<span class="n">arrayElementName</span><span class="o">=</span><span class="n">arrayElementName</span><span class="p">,</span>
<span class="n">nullValue</span><span class="o">=</span><span class="n">nullValue</span><span class="p">,</span>
<span class="n">dateFormat</span><span class="o">=</span><span class="n">dateFormat</span><span class="p">,</span>
<span class="n">timestampFormat</span><span class="o">=</span><span class="n">timestampFormat</span><span class="p">,</span>
<span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">,</span>
<span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">,</span>
<span class="n">validateName</span><span class="o">=</span><span class="n">validateName</span><span class="p">,</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">xml</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<div class="viewcode-block" id="DataFrameWriter.orc"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.orc.html#pyspark.sql.DataFrameWriter.orc">[docs]</a> <span class="k">def</span> <span class="nf">orc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">partitionBy</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">compression</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` in ORC format at the specified path.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> the path in any Hadoop supported file system</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> partitionBy : str or list, optional</span>
<span class="sd"> names of partitioning columns</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Write a DataFrame into a ORC file and read it back.</span>
<span class="sd"> &gt;&gt;&gt; import tempfile</span>
<span class="sd"> &gt;&gt;&gt; with tempfile.TemporaryDirectory(prefix=&quot;orc&quot;) as d:</span>
<span class="sd"> ... # Write a DataFrame into a ORC file</span>
<span class="sd"> ... spark.createDataFrame(</span>
<span class="sd"> ... [{&quot;age&quot;: 100, &quot;name&quot;: &quot;Hyukjin Kwon&quot;}]</span>
<span class="sd"> ... ).write.orc(d, mode=&quot;overwrite&quot;)</span>
<span class="sd"> ...</span>
<span class="sd"> ... # Read the Parquet file as a DataFrame.</span>
<span class="sd"> ... spark.read.format(&quot;orc&quot;).load(d).show()</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |age| name|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> |100|Hyukjin Kwon|</span>
<span class="sd"> +---+------------+</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span>
<span class="k">if</span> <span class="n">partitionBy</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">partitionBy</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span><span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">orc</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriter.jdbc"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriter.jdbc.html#pyspark.sql.DataFrameWriter.jdbc">[docs]</a> <span class="k">def</span> <span class="nf">jdbc</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">table</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">properties</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Saves the content of the :class:`DataFrame` to an external database table via JDBC.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> table : str</span>
<span class="sd"> Name of the table in the external database.</span>
<span class="sd"> mode : str, optional</span>
<span class="sd"> specifies the behavior of the save operation when data already exists.</span>
<span class="sd"> * ``append``: Append contents of this :class:`DataFrame` to existing data.</span>
<span class="sd"> * ``overwrite``: Overwrite existing data.</span>
<span class="sd"> * ``ignore``: Silently ignore this operation if data already exists.</span>
<span class="sd"> * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \</span>
<span class="sd"> exists.</span>
<span class="sd"> properties : dict</span>
<span class="sd"> a dictionary of JDBC database connection arguments. Normally at</span>
<span class="sd"> least properties &quot;user&quot; and &quot;password&quot; with their corresponding values.</span>
<span class="sd"> For example { &#39;user&#39; : &#39;SYSTEM&#39;, &#39;password&#39; : &#39;mypassword&#39; }</span>
<span class="sd"> Other Parameters</span>
<span class="sd"> ----------------</span>
<span class="sd"> Extra options</span>
<span class="sd"> For the extra options, refer to</span>
<span class="sd"> `Data Source Option &lt;https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option&gt;`_</span>
<span class="sd"> for the version you use.</span>
<span class="sd"> .. # noqa</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Don&#39;t create too many partitions in parallel on a large cluster;</span>
<span class="sd"> otherwise Spark might crash your external database systems.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaClass</span>
<span class="k">if</span> <span class="n">properties</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">properties</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_gateway</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">jprop</span> <span class="o">=</span> <span class="n">JavaClass</span><span class="p">(</span>
<span class="s2">&quot;java.util.Properties&quot;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="o">.</span><span class="n">_gateway</span><span class="o">.</span><span class="n">_gateway_client</span><span class="p">,</span>
<span class="p">)()</span>
<span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">properties</span><span class="p">:</span>
<span class="n">jprop</span><span class="o">.</span><span class="n">setProperty</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">properties</span><span class="p">[</span><span class="n">k</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">jdbc</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">jprop</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="DataFrameWriterV2"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.html#pyspark.sql.DataFrameWriterV2">[docs]</a><span class="k">class</span> <span class="nc">DataFrameWriterV2</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interface used to write a class:`pyspark.sql.dataframe.DataFrame`</span>
<span class="sd"> to external storage using the v2 API.</span>
<span class="sd"> .. versionadded:: 3.1.0</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Supports Spark Connect.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">df</span><span class="p">:</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">,</span> <span class="n">table</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_df</span> <span class="o">=</span> <span class="n">df</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_spark</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">sparkSession</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="o">.</span><span class="n">writeTo</span><span class="p">(</span><span class="n">table</span><span class="p">)</span>
<div class="viewcode-block" id="DataFrameWriterV2.using"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.using.html#pyspark.sql.DataFrameWriterV2.using">[docs]</a> <span class="k">def</span> <span class="nf">using</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">provider</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriterV2&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specifies a provider for the underlying output data source.</span>
<span class="sd"> Spark&#39;s default catalog supports &quot;parquet&quot;, &quot;json&quot;, etc.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">using</span><span class="p">(</span><span class="n">provider</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.option"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.option.html#pyspark.sql.DataFrameWriterV2.option">[docs]</a> <span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriterV2&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Add a write option.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">to_str</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.options"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.options.html#pyspark.sql.DataFrameWriterV2.options">[docs]</a> <span class="k">def</span> <span class="nf">options</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="s2">&quot;OptionalPrimitiveType&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriterV2&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Add write options.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">options</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">to_str</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">options</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">options</span><span class="p">(</span><span class="n">options</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.tableProperty"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.tableProperty.html#pyspark.sql.DataFrameWriterV2.tableProperty">[docs]</a> <span class="k">def</span> <span class="nf">tableProperty</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">property</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriterV2&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Add table property.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">tableProperty</span><span class="p">(</span><span class="nb">property</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.partitionedBy"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.partitionedBy.html#pyspark.sql.DataFrameWriterV2.partitionedBy">[docs]</a> <span class="k">def</span> <span class="nf">partitionedBy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="n">Column</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DataFrameWriterV2&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Partition the output table created by `create`, `createOrReplace`, or `replace` using</span>
<span class="sd"> the given columns or transforms.</span>
<span class="sd"> When specified, the table data will be stored by these values for efficient reads.</span>
<span class="sd"> For example, when a table is partitioned by day, it may be stored</span>
<span class="sd"> in a directory layout like:</span>
<span class="sd"> * `table/day=2019-06-01/`</span>
<span class="sd"> * `table/day=2019-06-02/`</span>
<span class="sd"> Partitioning is one of the most widely used techniques to optimize physical data layout.</span>
<span class="sd"> It provides a coarse-grained index for skipping unnecessary data reads when queries have</span>
<span class="sd"> predicates on the partitioned columns. In order for partitioning to work well, the number</span>
<span class="sd"> of distinct values in each column should typically be less than tens of thousands.</span>
<span class="sd"> `col` and `cols` support only the following functions:</span>
<span class="sd"> * :py:func:`pyspark.sql.functions.years`</span>
<span class="sd"> * :py:func:`pyspark.sql.functions.months`</span>
<span class="sd"> * :py:func:`pyspark.sql.functions.days`</span>
<span class="sd"> * :py:func:`pyspark.sql.functions.hours`</span>
<span class="sd"> * :py:func:`pyspark.sql.functions.bucket`</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_seq</span><span class="p">,</span> <span class="n">_to_java_column</span>
<span class="n">col</span> <span class="o">=</span> <span class="n">_to_java_column</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="n">cols</span> <span class="o">=</span> <span class="n">_to_seq</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_spark</span><span class="o">.</span><span class="n">_sc</span><span class="p">,</span> <span class="p">[</span><span class="n">_to_java_column</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">partitionedBy</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">cols</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.create"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.create.html#pyspark.sql.DataFrameWriterV2.create">[docs]</a> <span class="k">def</span> <span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Create a new table from the contents of the data frame.</span>
<span class="sd"> The new table&#39;s schema, partition layout, properties, and other configuration will be</span>
<span class="sd"> based on the configuration set on this writer.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">create</span><span class="p">()</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.replace"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.replace.html#pyspark.sql.DataFrameWriterV2.replace">[docs]</a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace an existing table with the contents of the data frame.</span>
<span class="sd"> The existing table&#39;s schema, partition layout, properties, and other configuration will be</span>
<span class="sd"> replaced with the contents of the data frame and the configuration set on this writer.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">replace</span><span class="p">()</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.createOrReplace"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.createOrReplace.html#pyspark.sql.DataFrameWriterV2.createOrReplace">[docs]</a> <span class="k">def</span> <span class="nf">createOrReplace</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Create a new table or replace an existing table with the contents of the data frame.</span>
<span class="sd"> The output table&#39;s schema, partition layout, properties,</span>
<span class="sd"> and other configuration will be based on the contents of the data frame</span>
<span class="sd"> and the configuration set on this writer.</span>
<span class="sd"> If the table exists, its configuration and data will be replaced.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">createOrReplace</span><span class="p">()</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.append"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.append.html#pyspark.sql.DataFrameWriterV2.append">[docs]</a> <span class="k">def</span> <span class="nf">append</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Append the contents of the data frame to the output table.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">append</span><span class="p">()</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.overwrite"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.overwrite.html#pyspark.sql.DataFrameWriterV2.overwrite">[docs]</a> <span class="k">def</span> <span class="nf">overwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">condition</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Overwrite rows matching the given filter condition with the contents of the data frame in</span>
<span class="sd"> the output table.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">_to_java_column</span>
<span class="n">condition</span> <span class="o">=</span> <span class="n">_to_java_column</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">overwrite</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span></div>
<div class="viewcode-block" id="DataFrameWriterV2.overwritePartitions"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.DataFrameWriterV2.overwritePartitions.html#pyspark.sql.DataFrameWriterV2.overwritePartitions">[docs]</a> <span class="k">def</span> <span class="nf">overwritePartitions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Overwrite all partition for which the data frame contains at least one row with the contents</span>
<span class="sd"> of the data frame in the output table.</span>
<span class="sd"> This operation is equivalent to Hive&#39;s `INSERT OVERWRITE ... PARTITION`, which replaces</span>
<span class="sd"> partitions dynamically depending on the contents of the data frame.</span>
<span class="sd"> .. versionadded: 3.1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwriter</span><span class="o">.</span><span class="n">overwritePartitions</span><span class="p">()</span></div></div>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">py4j</span>
<span class="kn">from</span> <span class="nn">pyspark.core.context</span> <span class="kn">import</span> <span class="n">SparkContext</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.sql.readwriter</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">&quot;SPARK_HOME&quot;</span><span class="p">])</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">readwriter</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">sc</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">,</span> <span class="s2">&quot;PythonTest&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">_getActiveSessionOrCreate</span><span class="p">()</span>
<span class="k">except</span> <span class="n">py4j</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">Py4JError</span><span class="p">:</span>
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;spark&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span>
<span class="n">pyspark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">readwriter</span><span class="p">,</span>
<span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span>
<span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">REPORT_NDIFF</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</article>
<footer class="bd-footer-article">
<div class="footer-article-items footer-article__inner">
<div class="footer-article-item"><!-- Previous / next buttons -->
<div class="prev-next-area">
</div></div>
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item"><p class="copyright">
Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p></div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item"><p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3.
</p></div>
</div>
</div>
</footer>
</body>
</html>