blob: ba27e0864acc0b21c932124afaf19183799db600 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Testing PySpark &#8212; PySpark 3.5.5 documentation</title>
<link href="../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" href="../_static/styles/pydata-sphinx-theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/language_data.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/development/testing.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Debugging PySpark" href="debugging.html" />
<link rel="prev" title="Contributing to PySpark" href="contributing.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl">
<div id="navbar-start">
<a class="navbar-brand" href="../index.html">
<img src="../_static/spark-logo-reverse.png" class="logo" alt="logo">
</a>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse">
<div id="navbar-center" class="mr-auto">
<div class="navbar-center-item">
<ul id="navbar-main-elements" class="navbar-nav">
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../index.html">
Overview
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../getting_started/index.html">
Getting Started
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../user_guide/index.html">
User Guides
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../reference/index.html">
API Reference
</a>
</li>
<li class="toctree-l1 current active nav-item">
<a class="reference internal nav-link" href="index.html">
Development
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</div>
</div>
<div id="navbar-end">
<div class="navbar-end-item">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
3.5.5
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "development/testing.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
</div>
</div>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="current nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="contributing.html">
Contributing to PySpark
</a>
</li>
<li class="toctree-l1 current active">
<a class="current reference internal" href="#">
Testing PySpark
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="debugging.html">
Debugging PySpark
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="setting_ide.html">
Setting up IDEs
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="errors.html">
Error classes in PySpark
</a>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#running-individual-pyspark-tests">
Running Individual PySpark Tests
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#running-tests-using-github-actions">
Running Tests using GitHub Actions
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#running-tests-for-spark-connect">
Running Tests for Spark Connect
</a>
<ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#running-tests-for-python-client">
Running Tests for Python Client
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#running-pyspark-shell-with-python-client">
Running PySpark Shell with Python Client
</a>
</li>
</ul>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="testing-pyspark">
<h1>Testing PySpark<a class="headerlink" href="#testing-pyspark" title="Permalink to this headline"></a></h1>
<p>In order to run PySpark tests, you should build Spark itself first via Maven or SBT. For example,</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>build/mvn<span class="w"> </span>-DskipTests<span class="w"> </span>clean<span class="w"> </span>package
</pre></div>
</div>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>build/sbt<span class="w"> </span>-Phive<span class="w"> </span>clean<span class="w"> </span>package
</pre></div>
</div>
<p>After that, the PySpark test cases can be run via using <code class="docutils literal notranslate"><span class="pre">python/run-tests</span></code>. For example,</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python/run-tests<span class="w"> </span>--python-executable<span class="o">=</span>python3
</pre></div>
</div>
<p>Note that you may set <code class="docutils literal notranslate"><span class="pre">OBJC_DISABLE_INITIALIZE_FORK_SAFETY</span></code> environment variable to <code class="docutils literal notranslate"><span class="pre">YES</span></code> if you are running tests on Mac OS.</p>
<p>Please see the guidance on how to <a class="reference external" href="https://github.com/apache/spark#building-spark">build Spark</a>,
<a class="reference external" href="https://spark.apache.org/developer-tools.html">run tests for a module, or individual tests</a>.</p>
<div class="section" id="running-individual-pyspark-tests">
<h2>Running Individual PySpark Tests<a class="headerlink" href="#running-individual-pyspark-tests" title="Permalink to this headline"></a></h2>
<p>You can run a specific test via using <code class="docutils literal notranslate"><span class="pre">python/run-tests</span></code>, for example, as below:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python/run-tests<span class="w"> </span>--testnames<span class="w"> </span>pyspark.sql.tests.test_arrow
</pre></div>
</div>
<p>Please refer to <a class="reference external" href="https://spark.apache.org/developer-tools.html">Testing PySpark</a> for more details.</p>
</div>
<div class="section" id="running-tests-using-github-actions">
<h2>Running Tests using GitHub Actions<a class="headerlink" href="#running-tests-using-github-actions" title="Permalink to this headline"></a></h2>
<p>You can run the full PySpark tests by using GitHub Actions in your own forked GitHub
repository with a few clicks. Please refer to
<a class="reference external" href="https://spark.apache.org/developer-tools.html">Running tests in your forked repository using GitHub Actions</a> for more details.</p>
</div>
<div class="section" id="running-tests-for-spark-connect">
<h2>Running Tests for Spark Connect<a class="headerlink" href="#running-tests-for-spark-connect" title="Permalink to this headline"></a></h2>
<div class="section" id="running-tests-for-python-client">
<h3>Running Tests for Python Client<a class="headerlink" href="#running-tests-for-python-client" title="Permalink to this headline"></a></h3>
<p>In order to test the changes in Protobuf definitions, for example, at
<a class="reference external" href="https://github.com/apache/spark/tree/master/connector/connect/common/src/main/protobuf/spark/connect">spark/connector/connect/common/src/main/protobuf/spark/connect</a>,
you should regenerate Python Protobuf client first by running <code class="docutils literal notranslate"><span class="pre">dev/connect-gen-protos.sh</span></code>.</p>
</div>
<div class="section" id="running-pyspark-shell-with-python-client">
<h3>Running PySpark Shell with Python Client<a class="headerlink" href="#running-pyspark-shell-with-python-client" title="Permalink to this headline"></a></h3>
<p>For Apache Spark you locally built:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bin/pyspark<span class="w"> </span>--remote<span class="w"> </span><span class="s2">&quot;local[*]&quot;</span>
</pre></div>
</div>
<p>For the Apache Spark release:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bin/pyspark<span class="w"> </span>--remote<span class="w"> </span><span class="s2">&quot;local[*]&quot;</span><span class="w"> </span>--packages<span class="w"> </span>org.apache.spark:spark-connect_2.12:3.4.0
</pre></div>
</div>
</div>
</div>
</div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="contributing.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Contributing to PySpark</p>
</div>
</a>
<a class='right-next' id="next-link" href="debugging.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Debugging PySpark</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright .<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br>
</p>
</div>
</div>
</footer>
</body>
</html>