blob: 90f2dac2a01941b72f54f0b2685ae32c263a205e [file] [log] [blame]
<!DOCTYPE html>
<html class="no-js">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Running Spark on Kubernetes - Spark 4.1.0-preview1 Documentation</title>
<link rel="stylesheet" href="css/bootstrap.min.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="css/custom.css" rel="stylesheet">
<script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
<link rel="stylesheet" href="css/pygments-default.css">
<link rel="stylesheet" href="css/docsearch.min.css" />
<link rel="stylesheet" href="css/docsearch.css">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
<div class="navbar-brand"><a href="index.html">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">4.1.0-preview1</span>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse"
data-target="#navbarCollapse" aria-controls="navbarCollapse"
aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto">
<li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
<div class="dropdown-menu" aria-labelledby="navbarQuickStart">
<a class="dropdown-item" href="quick-start.html">Quick Start</a>
<a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
<a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
<a class="dropdown-item" href="streaming/index.html">Structured Streaming</a>
<a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
<a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
<a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
<a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
<a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
<a class="dropdown-item" href="declarative-pipelines-programming-guide.html">Declarative Pipelines</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
<div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
<a class="dropdown-item" href="api/python/index.html">Python</a>
<a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
<a class="dropdown-item" href="api/java/index.html">Java</a>
<a class="dropdown-item" href="api/R/index.html">R</a>
<a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
<div class="dropdown-menu" aria-labelledby="navbarDeploying">
<a class="dropdown-item" href="cluster-overview.html">Overview</a>
<a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
<a class="dropdown-item" href="running-on-yarn.html">YARN</a>
<a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
<div class="dropdown-menu" aria-labelledby="navbarMore">
<a class="dropdown-item" href="configuration.html">Configuration</a>
<a class="dropdown-item" href="monitoring.html">Monitoring</a>
<a class="dropdown-item" href="tuning.html">Tuning Guide</a>
<a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
<a class="dropdown-item" href="security.html">Security</a>
<a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
<a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="building-spark.html">Building Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
</div>
</li>
<li class="nav-item">
<input type="text" id="docsearch-input" placeholder="Search the docs…">
</li>
</ul>
<!--<span class="navbar-text navbar-right"><span class="version-text">v4.1.0-preview1</span></span>-->
</div>
</nav>
<div class="container">
<div class="content mr-3" id="content">
<h1 class="title">Running Spark on Kubernetes</h1>
<ul id="markdown-toc">
<li><a href="#security" id="markdown-toc-security">Security</a> <ul>
<li><a href="#user-identity" id="markdown-toc-user-identity">User Identity</a></li>
<li><a href="#volume-mounts" id="markdown-toc-volume-mounts">Volume Mounts</a></li>
</ul>
</li>
<li><a href="#prerequisites" id="markdown-toc-prerequisites">Prerequisites</a></li>
<li><a href="#how-it-works" id="markdown-toc-how-it-works">How it works</a></li>
<li><a href="#submitting-applications-to-kubernetes" id="markdown-toc-submitting-applications-to-kubernetes">Submitting Applications to Kubernetes</a> <ul>
<li><a href="#docker-images" id="markdown-toc-docker-images">Docker Images</a></li>
<li><a href="#cluster-mode" id="markdown-toc-cluster-mode">Cluster Mode</a></li>
<li><a href="#client-mode" id="markdown-toc-client-mode">Client Mode</a> <ul>
<li><a href="#client-mode-networking" id="markdown-toc-client-mode-networking">Client Mode Networking</a></li>
<li><a href="#client-mode-executor-pod-garbage-collection" id="markdown-toc-client-mode-executor-pod-garbage-collection">Client Mode Executor Pod Garbage Collection</a></li>
<li><a href="#authentication-parameters" id="markdown-toc-authentication-parameters">Authentication Parameters</a></li>
</ul>
</li>
<li><a href="#ipv4-and-ipv6" id="markdown-toc-ipv4-and-ipv6">IPv4 and IPv6</a></li>
<li><a href="#dependency-management" id="markdown-toc-dependency-management">Dependency Management</a></li>
<li><a href="#secret-management" id="markdown-toc-secret-management">Secret Management</a></li>
<li><a href="#pod-template" id="markdown-toc-pod-template">Pod Template</a></li>
<li><a href="#using-kubernetes-volumes" id="markdown-toc-using-kubernetes-volumes">Using Kubernetes Volumes</a> <ul>
<li><a href="#pvc-oriented-executor-pod-allocation" id="markdown-toc-pvc-oriented-executor-pod-allocation">PVC-oriented executor pod allocation</a></li>
</ul>
</li>
<li><a href="#local-storage" id="markdown-toc-local-storage">Local Storage</a> <ul>
<li><a href="#using-ram-for-local-storage" id="markdown-toc-using-ram-for-local-storage">Using RAM for local storage</a></li>
</ul>
</li>
<li><a href="#introspection-and-debugging" id="markdown-toc-introspection-and-debugging">Introspection and Debugging</a> <ul>
<li><a href="#accessing-logs" id="markdown-toc-accessing-logs">Accessing Logs</a></li>
<li><a href="#accessing-driver-ui" id="markdown-toc-accessing-driver-ui">Accessing Driver UI</a></li>
<li><a href="#debugging" id="markdown-toc-debugging">Debugging</a></li>
</ul>
</li>
<li><a href="#kubernetes-features" id="markdown-toc-kubernetes-features">Kubernetes Features</a> <ul>
<li><a href="#configuration-file" id="markdown-toc-configuration-file">Configuration File</a></li>
<li><a href="#contexts" id="markdown-toc-contexts">Contexts</a></li>
<li><a href="#namespaces" id="markdown-toc-namespaces">Namespaces</a></li>
<li><a href="#rbac" id="markdown-toc-rbac">RBAC</a></li>
</ul>
</li>
<li><a href="#spark-application-management" id="markdown-toc-spark-application-management">Spark Application Management</a></li>
<li><a href="#future-work" id="markdown-toc-future-work">Future Work</a></li>
</ul>
</li>
<li><a href="#configuration" id="markdown-toc-configuration">Configuration</a> <ul>
<li><a href="#spark-properties" id="markdown-toc-spark-properties">Spark Properties</a></li>
<li><a href="#pod-template-properties" id="markdown-toc-pod-template-properties">Pod template properties</a></li>
<li><a href="#pod-metadata" id="markdown-toc-pod-metadata">Pod Metadata</a></li>
<li><a href="#pod-spec" id="markdown-toc-pod-spec">Pod Spec</a></li>
<li><a href="#container-spec" id="markdown-toc-container-spec">Container spec</a></li>
<li><a href="#resource-allocation-and-configuration-overview" id="markdown-toc-resource-allocation-and-configuration-overview">Resource Allocation and Configuration Overview</a></li>
<li><a href="#resource-level-scheduling-overview" id="markdown-toc-resource-level-scheduling-overview">Resource Level Scheduling Overview</a> <ul>
<li><a href="#priority-scheduling" id="markdown-toc-priority-scheduling">Priority Scheduling</a></li>
<li><a href="#customized-kubernetes-schedulers-for-spark-on-kubernetes" id="markdown-toc-customized-kubernetes-schedulers-for-spark-on-kubernetes">Customized Kubernetes Schedulers for Spark on Kubernetes</a></li>
<li><a href="#using-volcano-as-customized-scheduler-for-spark-on-kubernetes" id="markdown-toc-using-volcano-as-customized-scheduler-for-spark-on-kubernetes">Using Volcano as Customized Scheduler for Spark on Kubernetes</a> <ul>
<li><a href="#prerequisites-1" id="markdown-toc-prerequisites-1">Prerequisites</a></li>
<li><a href="#build" id="markdown-toc-build">Build</a></li>
<li><a href="#usage" id="markdown-toc-usage">Usage</a></li>
<li><a href="#volcano-feature-step" id="markdown-toc-volcano-feature-step">Volcano Feature Step</a></li>
<li><a href="#volcano-podgroup-template" id="markdown-toc-volcano-podgroup-template">Volcano PodGroup Template</a></li>
</ul>
</li>
<li><a href="#using-apache-yunikorn-as-customized-scheduler-for-spark-on-kubernetes" id="markdown-toc-using-apache-yunikorn-as-customized-scheduler-for-spark-on-kubernetes">Using Apache YuniKorn as Customized Scheduler for Spark on Kubernetes</a> <ul>
<li><a href="#prerequisites-2" id="markdown-toc-prerequisites-2">Prerequisites</a></li>
<li><a href="#get-started" id="markdown-toc-get-started">Get started</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#stage-level-scheduling-overview" id="markdown-toc-stage-level-scheduling-overview">Stage Level Scheduling Overview</a></li>
</ul>
</li>
</ul>
<p>Spark can run on clusters managed by <a href="https://kubernetes.io">Kubernetes</a>. This feature makes use of native
Kubernetes scheduler that has been added to Spark.</p>
<h1 id="security">Security</h1>
<p>Security features like authentication are not enabled by default. When deploying a cluster that is open to the internet
or an untrusted network, it&#8217;s important to secure access to the cluster to prevent unauthorized applications
from running on the cluster.
Please see <a href="security.html">Spark Security</a> and the specific security sections in this doc before running Spark.</p>
<h2 id="user-identity">User Identity</h2>
<p>Images built from the project provided Dockerfiles contain a default <a href="https://docs.docker.com/engine/reference/builder/#user"><code class="language-plaintext highlighter-rouge">USER</code></a> directive with a default UID of <code class="language-plaintext highlighter-rouge">185</code>. This means that the resulting images will be running the Spark processes as this UID inside the container. Security conscious deployments should consider providing custom images with <code class="language-plaintext highlighter-rouge">USER</code> directives specifying their desired unprivileged UID and GID. The resulting UID should include the root group in its supplementary groups in order to be able to run the Spark executables. Users building their own images with the provided <code class="language-plaintext highlighter-rouge">docker-image-tool.sh</code> script can use the <code class="language-plaintext highlighter-rouge">-u &lt;uid&gt;</code> option to specify the desired UID.</p>
<p>Alternatively the <a href="#pod-template">Pod Template</a> feature can be used to add a <a href="https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#volumes-and-file-systems">Security Context</a> with a <code class="language-plaintext highlighter-rouge">runAsUser</code> to the pods that Spark submits. This can be used to override the <code class="language-plaintext highlighter-rouge">USER</code> directives in the images themselves. Please bear in mind that this requires cooperation from your users and as such may not be a suitable solution for shared environments. Cluster administrators should use <a href="https://kubernetes.io/docs/concepts/policy/pod-security-policy/#users-and-groups">Pod Security Policies</a> if they wish to limit the users that pods may run as.</p>
<h2 id="volume-mounts">Volume Mounts</h2>
<p>As described later in this document under <a href="#using-kubernetes-volumes">Using Kubernetes Volumes</a> Spark on K8S provides configuration options that allow for mounting certain volume types into the driver and executor pods. In particular it allows for <a href="https://kubernetes.io/docs/concepts/storage/volumes/#hostpath"><code class="language-plaintext highlighter-rouge">hostPath</code></a> volumes which as described in the Kubernetes documentation have known security vulnerabilities.</p>
<p>Cluster administrators should use <a href="https://kubernetes.io/docs/concepts/policy/pod-security-policy/">Pod Security Policies</a> to limit the ability to mount <code class="language-plaintext highlighter-rouge">hostPath</code> volumes appropriately for their environments.</p>
<h1 id="prerequisites">Prerequisites</h1>
<ul>
<li>A running Kubernetes cluster at version &gt;= 1.31 with access configured to it using
<a href="https://kubernetes.io/docs/reference/kubectl/">kubectl</a>. If you do not already have a working Kubernetes cluster,
you may set up a test cluster on your local machine using
<a href="https://kubernetes.io/docs/getting-started-guides/minikube/">minikube</a>.
<ul>
<li>We recommend using the latest release of minikube with the DNS addon enabled.</li>
<li>Be aware that the default minikube configuration is not enough for running Spark applications.
We recommend 3 CPUs and 4g of memory to be able to start a simple Spark application with a single
executor.</li>
<li>Check <a href="https://github.com/fabric8io/kubernetes-client">kubernetes-client library</a>&#8217;s version of your Spark environment, and its compatibility with your Kubernetes cluster&#8217;s version.</li>
</ul>
</li>
<li>You must have appropriate permissions to list, create, edit and delete
<a href="https://kubernetes.io/docs/concepts/workloads/pods/">pods</a> in your cluster. You can verify that you can list these resources
by running <code class="language-plaintext highlighter-rouge">kubectl auth can-i &lt;list|create|edit|delete&gt; pods</code>.
<ul>
<li>The service account credentials used by the driver pods must be allowed to create pods, services and configmaps.</li>
</ul>
</li>
<li>You must have <a href="https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/">Kubernetes DNS</a> configured in your cluster.</li>
</ul>
<h1 id="how-it-works">How it works</h1>
<p style="text-align: center;">
<img src="img/k8s-cluster-mode.png" title="Spark cluster components" alt="Spark cluster components" />
</p>
<p><code>spark-submit</code> can be directly used to submit a Spark application to a Kubernetes cluster.
The submission mechanism works as follows:</p>
<ul>
<li>Spark creates a Spark driver running within a <a href="https://kubernetes.io/docs/concepts/workloads/pods/pod/">Kubernetes pod</a>.</li>
<li>The driver creates executors which are also running within Kubernetes pods and connects to them, and executes application code.</li>
<li>When the application completes, the executor pods terminate and are cleaned up, but the driver pod persists
logs and remains in &#8220;completed&#8221; state in the Kubernetes API until it&#8217;s eventually garbage collected or manually cleaned up.</li>
</ul>
<p>Note that in the completed state, the driver pod does <em>not</em> use any computational or memory resources.</p>
<p>The driver and executor pod scheduling is handled by Kubernetes. Communication to the Kubernetes API is done via fabric8. It is possible to schedule the
driver and executor pods on a subset of available nodes through a <a href="https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector">node selector</a>
using the configuration property for it. It will be possible to use more advanced
scheduling hints like <a href="https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity">node/pod affinities</a> in a future release.</p>
<h1 id="submitting-applications-to-kubernetes">Submitting Applications to Kubernetes</h1>
<h2 id="docker-images">Docker Images</h2>
<p>Kubernetes requires users to supply images that can be deployed into containers within pods. The images are built to
be run in a container runtime environment that Kubernetes supports. Docker is a container runtime environment that is
frequently used with Kubernetes. Spark (starting with version 2.3) ships with a Dockerfile that can be used for this
purpose, or customized to match an individual application&#8217;s needs. It can be found in the <code class="language-plaintext highlighter-rouge">kubernetes/dockerfiles/</code>
directory.</p>
<p>Spark also ships with a <code class="language-plaintext highlighter-rouge">bin/docker-image-tool.sh</code> script that can be used to build and publish the Docker images to
use with the Kubernetes backend.</p>
<p>Example usage is:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>./bin/docker-image-tool.sh <span class="nt">-r</span> &lt;repo&gt; <span class="nt">-t</span> my-tag build
<span class="nv">$ </span>./bin/docker-image-tool.sh <span class="nt">-r</span> &lt;repo&gt; <span class="nt">-t</span> my-tag push
</code></pre></div></div>
<p>This will build using the projects provided default <code class="language-plaintext highlighter-rouge">Dockerfiles</code>. To see more options available for customising the behaviour of this tool, including providing custom <code class="language-plaintext highlighter-rouge">Dockerfiles</code>, please run with the <code class="language-plaintext highlighter-rouge">-h</code> flag.</p>
<p>By default <code class="language-plaintext highlighter-rouge">bin/docker-image-tool.sh</code> builds docker image for running JVM jobs. You need to opt-in to build additional
language binding docker images.</p>
<p>Example usage is</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c"># To build additional PySpark docker image</span>
<span class="nv">$ </span>./bin/docker-image-tool.sh <span class="nt">-r</span> &lt;repo&gt; <span class="nt">-t</span> my-tag <span class="nt">-p</span> ./kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
<span class="c"># To build additional SparkR docker image</span>
<span class="nv">$ </span>./bin/docker-image-tool.sh <span class="nt">-r</span> &lt;repo&gt; <span class="nt">-t</span> my-tag <span class="nt">-R</span> ./kubernetes/dockerfiles/spark/bindings/R/Dockerfile build
</code></pre></div></div>
<p>You can also use the <a href="https://hub.docker.com/r/apache/spark">Apache Spark Docker images</a> (such as <code class="language-plaintext highlighter-rouge">apache/spark:&lt;version&gt;</code>) directly.</p>
<h2 id="cluster-mode">Cluster Mode</h2>
<p>To launch Spark Pi in cluster mode,</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>./bin/spark-submit <span class="se">\</span>
<span class="nt">--master</span> k8s://https://&lt;k8s-apiserver-host&gt;:&lt;k8s-apiserver-port&gt; <span class="se">\</span>
<span class="nt">--deploy-mode</span> cluster <span class="se">\</span>
<span class="nt">--name</span> spark-pi <span class="se">\</span>
<span class="nt">--class</span> org.apache.spark.examples.SparkPi <span class="se">\</span>
<span class="nt">--conf</span> spark.executor.instances<span class="o">=</span>5 <span class="se">\</span>
<span class="nt">--conf</span> spark.kubernetes.container.image<span class="o">=</span>&lt;spark-image&gt; <span class="se">\</span>
<span class="nb">local</span>:///path/to/examples.jar
</code></pre></div></div>
<p>The Spark master, specified either via passing the <code class="language-plaintext highlighter-rouge">--master</code> command line argument to <code class="language-plaintext highlighter-rouge">spark-submit</code> or by setting
<code class="language-plaintext highlighter-rouge">spark.master</code> in the application&#8217;s configuration, must be a URL with the format <code class="language-plaintext highlighter-rouge">k8s://&lt;api_server_host&gt;:&lt;k8s-apiserver-port&gt;</code>. The port must always be specified, even if it&#8217;s the HTTPS port 443. Prefixing the
master string with <code class="language-plaintext highlighter-rouge">k8s://</code> will cause the Spark application to launch on the Kubernetes cluster, with the API server
being contacted at <code class="language-plaintext highlighter-rouge">api_server_url</code>. If no HTTP protocol is specified in the URL, it defaults to <code class="language-plaintext highlighter-rouge">https</code>. For example,
setting the master to <code class="language-plaintext highlighter-rouge">k8s://example.com:443</code> is equivalent to setting it to <code class="language-plaintext highlighter-rouge">k8s://https://example.com:443</code>, but to
connect without TLS on a different port, the master would be set to <code class="language-plaintext highlighter-rouge">k8s://http://example.com:8080</code>.</p>
<p>In Kubernetes mode, the Spark application name that is specified by <code class="language-plaintext highlighter-rouge">spark.app.name</code> or the <code class="language-plaintext highlighter-rouge">--name</code> argument to
<code class="language-plaintext highlighter-rouge">spark-submit</code> is used by default to name the Kubernetes resources created like drivers and executors. So, application names
must consist of lower case alphanumeric characters, <code class="language-plaintext highlighter-rouge">-</code>, and <code class="language-plaintext highlighter-rouge">.</code> and must start and end with an alphanumeric character.</p>
<p>If you have a Kubernetes cluster setup, one way to discover the apiserver URL is by executing <code class="language-plaintext highlighter-rouge">kubectl cluster-info</code>.</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl cluster-info
Kubernetes master is running at http://127.0.0.1:6443
</code></pre></div></div>
<p>In the above example, the specific Kubernetes cluster can be used with <code>spark-submit</code> by specifying
<code class="language-plaintext highlighter-rouge">--master k8s://http://127.0.0.1:6443</code> as an argument to spark-submit. Additionally, it is also possible to use the
authenticating proxy, <code class="language-plaintext highlighter-rouge">kubectl proxy</code> to communicate to the Kubernetes API.</p>
<p>The local proxy can be started by:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl proxy
</code></pre></div></div>
<p>If the local proxy is running at localhost:8001, <code class="language-plaintext highlighter-rouge">--master k8s://http://127.0.0.1:8001</code> can be used as the argument to
spark-submit. Finally, notice that in the above example we specify a jar with a specific URI with a scheme of <code class="language-plaintext highlighter-rouge">local://</code>.
This URI is the location of the example jar that is already in the Docker image.</p>
<h2 id="client-mode">Client Mode</h2>
<p>Starting with Spark 2.4.0, it is possible to run Spark applications on Kubernetes in client mode. When your application
runs in client mode, the driver can run inside a pod or on a physical host. When running an application in client mode,
it is recommended to account for the following factors:</p>
<h3 id="client-mode-networking">Client Mode Networking</h3>
<p>Spark executors must be able to connect to the Spark driver over a hostname and a port that is routable from the Spark
executors. The specific network configuration that will be required for Spark to work in client mode will vary per
setup. If you run your driver inside a Kubernetes pod, you can use a
<a href="https://kubernetes.io/docs/concepts/services-networking/service/#headless-services">headless service</a> to allow your
driver pod to be routable from the executors by a stable hostname. When deploying your headless service, ensure that
the service&#8217;s label selector will only match the driver pod and no other pods; it is recommended to assign your driver
pod a sufficiently unique label and to use that label in the label selector of the headless service. Specify the driver&#8217;s
hostname via <code class="language-plaintext highlighter-rouge">spark.driver.host</code> and your spark driver&#8217;s port to <code class="language-plaintext highlighter-rouge">spark.driver.port</code>.</p>
<h3 id="client-mode-executor-pod-garbage-collection">Client Mode Executor Pod Garbage Collection</h3>
<p>If you run your Spark driver in a pod, it is highly recommended to set <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.pod.name</code> to the name of that pod.
When this property is set, the Spark scheduler will deploy the executor pods with an
<a href="https://kubernetes.io/docs/concepts/workloads/controllers/garbage-collection/">OwnerReference</a>, which in turn will
ensure that once the driver pod is deleted from the cluster, all of the application&#8217;s executor pods will also be deleted.
The driver will look for a pod with the given name in the namespace specified by <code class="language-plaintext highlighter-rouge">spark.kubernetes.namespace</code>, and
an OwnerReference pointing to that pod will be added to each executor pod&#8217;s OwnerReferences list. Be careful to avoid
setting the OwnerReference to a pod that is not actually that driver pod, or else the executors may be terminated
prematurely when the wrong pod is deleted.</p>
<p>If your application is not running inside a pod, or if <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.pod.name</code> is not set when your application is
actually running in a pod, keep in mind that the executor pods may not be properly deleted from the cluster when the
application exits. The Spark scheduler attempts to delete these pods, but if the network request to the API server fails
for any reason, these pods will remain in the cluster. The executor processes should exit when they cannot reach the
driver, so the executor pods should not consume compute resources (cpu and memory) in the cluster after your application
exits.</p>
<p>You may use <code class="language-plaintext highlighter-rouge">spark.kubernetes.executor.podNamePrefix</code> to fully control the executor pod names.
When this property is set, it&#8217;s highly recommended to make it unique across all jobs in the same namespace.</p>
<h3 id="authentication-parameters">Authentication Parameters</h3>
<p>Use the exact prefix <code class="language-plaintext highlighter-rouge">spark.kubernetes.authenticate</code> for Kubernetes authentication parameters in client mode.</p>
<h2 id="ipv4-and-ipv6">IPv4 and IPv6</h2>
<p>Starting with 3.4.0, Spark supports additionally IPv6-only environment via
<a href="https://kubernetes.io/docs/concepts/services-networking/dual-stack/">IPv4/IPv6 dual-stack network</a>
feature which enables the allocation of both IPv4 and IPv6 addresses to Pods and Services.
According to the K8s cluster capability, <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.service.ipFamilyPolicy</code> and
<code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.service.ipFamilies</code> can be one of <code class="language-plaintext highlighter-rouge">SingleStack</code>, <code class="language-plaintext highlighter-rouge">PreferDualStack</code>,
and <code class="language-plaintext highlighter-rouge">RequireDualStack</code> and one of <code class="language-plaintext highlighter-rouge">IPv4</code>, <code class="language-plaintext highlighter-rouge">IPv6</code>, <code class="language-plaintext highlighter-rouge">IPv4,IPv6</code>, and <code class="language-plaintext highlighter-rouge">IPv6,IPv4</code> respectively.
By default, Spark uses <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.service.ipFamilyPolicy=SingleStack</code> and
<code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.service.ipFamilies=IPv4</code>.</p>
<p>To use only <code class="language-plaintext highlighter-rouge">IPv6</code>, you can submit your jobs with the following.</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code>...
<span class="nt">--conf</span> spark.kubernetes.driver.service.ipFamilies<span class="o">=</span>IPv6 <span class="se">\</span>
</code></pre></div></div>
<p>In <code class="language-plaintext highlighter-rouge">DualStack</code> environment, you may need <code class="language-plaintext highlighter-rouge">java.net.preferIPv6Addresses=true</code> for JVM
and <code class="language-plaintext highlighter-rouge">SPARK_PREFER_IPV6=true</code> for Python additionally to use <code class="language-plaintext highlighter-rouge">IPv6</code>.</p>
<h2 id="dependency-management">Dependency Management</h2>
<p>If your application&#8217;s dependencies are all hosted in remote locations like HDFS or HTTP servers, they may be referred to
by their appropriate remote URIs. Also, application dependencies can be pre-mounted into custom-built Docker images.
Those dependencies can be added to the classpath by referencing them with <code class="language-plaintext highlighter-rouge">local://</code> URIs and/or setting the
<code class="language-plaintext highlighter-rouge">SPARK_EXTRA_CLASSPATH</code> environment variable in your Dockerfiles. The <code class="language-plaintext highlighter-rouge">local://</code> scheme is also required when referring to
dependencies in custom-built Docker images in <code class="language-plaintext highlighter-rouge">spark-submit</code>. We support dependencies from the submission
client&#8217;s local file system using the <code class="language-plaintext highlighter-rouge">file://</code> scheme or without a scheme (using a full path), where the destination should be a Hadoop compatible filesystem.
A typical example of this using S3 is via passing the following options:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>...
--packages org.apache.hadoop:hadoop-aws:3.4.1
--conf spark.kubernetes.file.upload.path=s3a://&lt;s3-bucket&gt;/path
--conf spark.hadoop.fs.s3a.access.key=...
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
--conf spark.hadoop.fs.s3a.fast.upload=true
--conf spark.hadoop.fs.s3a.secret.key=....
--conf spark.driver.extraJavaOptions=-Divy.cache.dir=/tmp -Divy.home=/tmp
file:///full/path/to/app.jar
</code></pre></div></div>
<p>The app jar file will be uploaded to the S3 and then when the driver is launched it will be downloaded
to the driver pod and will be added to its classpath. Spark will generate a subdir under the upload path with a random name
to avoid conflicts with spark apps running in parallel. User could manage the subdirs created according to his needs.</p>
<p>The client scheme is supported for the application jar, and dependencies specified by properties <code class="language-plaintext highlighter-rouge">spark.jars</code>, <code class="language-plaintext highlighter-rouge">spark.files</code> and <code class="language-plaintext highlighter-rouge">spark.archives</code>.</p>
<p>Important: all client-side dependencies will be uploaded to the given path with a flat directory structure so
file names must be unique otherwise files will be overwritten. Also make sure in the derived k8s image default ivy dir
has the required access rights or modify the settings as above. The latter is also important if you use <code class="language-plaintext highlighter-rouge">--packages</code> in
cluster mode.</p>
<h2 id="secret-management">Secret Management</h2>
<p>Kubernetes <a href="https://kubernetes.io/docs/concepts/configuration/secret/">Secrets</a> can be used to provide credentials for a
Spark application to access secured services. To mount a user-specified secret into the driver container, users can use
the configuration property of the form <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.secrets.[SecretName]=&lt;mount path&gt;</code>. Similarly, the
configuration property of the form <code class="language-plaintext highlighter-rouge">spark.kubernetes.executor.secrets.[SecretName]=&lt;mount path&gt;</code> can be used to mount a
user-specified secret into the executor containers. Note that it is assumed that the secret to be mounted is in the same
namespace as that of the driver and executor pods. For example, to mount a secret named <code class="language-plaintext highlighter-rouge">spark-secret</code> onto the path
<code class="language-plaintext highlighter-rouge">/etc/secrets</code> in both the driver and executor containers, add the following options to the <code class="language-plaintext highlighter-rouge">spark-submit</code> command:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--conf spark.kubernetes.driver.secrets.spark-secret=/etc/secrets
--conf spark.kubernetes.executor.secrets.spark-secret=/etc/secrets
</code></pre></div></div>
<p>To use a secret through an environment variable use the following options to the <code class="language-plaintext highlighter-rouge">spark-submit</code> command:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--conf spark.kubernetes.driver.secretKeyRef.ENV_NAME=name:key
--conf spark.kubernetes.executor.secretKeyRef.ENV_NAME=name:key
</code></pre></div></div>
<h2 id="pod-template">Pod Template</h2>
<p>Kubernetes allows defining pods from <a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-overview/#pod-templates">template files</a>.
Spark users can similarly use template files to define the driver or executor pod configurations that Spark configurations do not support.
To do so, specify the spark properties <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.podTemplateFile</code> and <code class="language-plaintext highlighter-rouge">spark.kubernetes.executor.podTemplateFile</code>
to point to files accessible to the <code class="language-plaintext highlighter-rouge">spark-submit</code> process.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--conf spark.kubernetes.driver.podTemplateFile=s3a://bucket/driver.yml
--conf spark.kubernetes.executor.podTemplateFile=s3a://bucket/executor.yml
</code></pre></div></div>
<p>To allow the driver pod access the executor pod template
file, the file will be automatically mounted onto a volume in the driver pod when it&#8217;s created.
Spark does not do any validation after unmarshalling these template files and relies on the Kubernetes API server for validation.</p>
<p>It is important to note that Spark is opinionated about certain pod configurations so there are values in the
pod template that will always be overwritten by Spark. Therefore, users of this feature should note that specifying
the pod template file only lets Spark start with a template pod instead of an empty pod during the pod-building process.
For details, see the <a href="#pod-template-properties">full list</a> of pod template values that will be overwritten by spark.</p>
<p>Pod template files can also define multiple containers. In such cases, you can use the spark properties
<code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.podTemplateContainerName</code> and <code class="language-plaintext highlighter-rouge">spark.kubernetes.executor.podTemplateContainerName</code>
to indicate which container should be used as a basis for the driver or executor.
If not specified, or if the container name is not valid, Spark will assume that the first container in the list
will be the driver or executor container.</p>
<h2 id="using-kubernetes-volumes">Using Kubernetes Volumes</h2>
<p>Users can mount the following types of Kubernetes <a href="https://kubernetes.io/docs/concepts/storage/volumes/">volumes</a> into the driver and executor pods:</p>
<ul>
<li><a href="https://kubernetes.io/docs/concepts/storage/volumes/#hostpath">hostPath</a>: mounts a file or directory from the host node’s filesystem into a pod.</li>
<li><a href="https://kubernetes.io/docs/concepts/storage/volumes/#emptydir">emptyDir</a>: an initially empty volume created when a pod is assigned to a node.</li>
<li><a href="https://kubernetes.io/docs/concepts/storage/volumes/#nfs">nfs</a>: mounts an existing NFS(Network File System) into a pod.</li>
<li><a href="https://kubernetes.io/docs/concepts/storage/volumes/#persistentvolumeclaim">persistentVolumeClaim</a>: mounts a <code class="language-plaintext highlighter-rouge">PersistentVolume</code> into a pod.</li>
</ul>
<p><strong>NB:</strong> Please see the <a href="#security">Security</a> section of this document for security issues related to volume mounts.</p>
<p>To mount a volume of any of the types above into the driver pod, use the following configuration property:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--conf spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.path=&lt;mount path&gt;
--conf spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.readOnly=&lt;true|false&gt;
--conf spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.subPath=&lt;mount subPath&gt;
</code></pre></div></div>
<p>Specifically, <code class="language-plaintext highlighter-rouge">VolumeType</code> can be one of the following values: <code class="language-plaintext highlighter-rouge">hostPath</code>, <code class="language-plaintext highlighter-rouge">emptyDir</code>, <code class="language-plaintext highlighter-rouge">nfs</code> and <code class="language-plaintext highlighter-rouge">persistentVolumeClaim</code>. <code class="language-plaintext highlighter-rouge">VolumeName</code> is the name you want to use for the volume under the <code class="language-plaintext highlighter-rouge">volumes</code> field in the pod specification.</p>
<p>Each supported type of volumes may have some specific configuration options, which can be specified using configuration properties of the following form:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].options.[OptionName]=&lt;value&gt;
</code></pre></div></div>
<p>For example, the server and path of a <code class="language-plaintext highlighter-rouge">nfs</code> with volume name <code class="language-plaintext highlighter-rouge">images</code> can be specified using the following properties:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.driver.volumes.nfs.images.options.server=example.com
spark.kubernetes.driver.volumes.nfs.images.options.path=/data
</code></pre></div></div>
<p>And, the claim name of a <code class="language-plaintext highlighter-rouge">persistentVolumeClaim</code> with volume name <code class="language-plaintext highlighter-rouge">checkpointpvc</code> can be specified using the following property:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.options.claimName=check-point-pvc-claim
</code></pre></div></div>
<p>The configuration properties for mounting volumes into the executor pods use prefix <code class="language-plaintext highlighter-rouge">spark.kubernetes.executor.</code> instead of <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.</code>.</p>
<p>For example, you can mount a dynamically-created persistent volume claim per executor by using <code class="language-plaintext highlighter-rouge">OnDemand</code> as a claim name and <code class="language-plaintext highlighter-rouge">storageClass</code> and <code class="language-plaintext highlighter-rouge">sizeLimit</code> options like the following. This is useful in case of <a href="configuration.html#dynamic-allocation">Dynamic Allocation</a>.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName=OnDemand
spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.storageClass=gp
spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.sizeLimit=500Gi
spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path=/data
spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly=false
</code></pre></div></div>
<p>For a complete list of available options for each supported type of volumes, please refer to the <a href="#spark-properties">Spark Properties</a> section below.</p>
<h3 id="pvc-oriented-executor-pod-allocation">PVC-oriented executor pod allocation</h3>
<p>Since disks are one of the important resource types, Spark driver provides a fine-grained control
via a set of configurations. For example, by default, on-demand PVCs are owned by executors and
the lifecycle of PVCs are tightly coupled with its owner executors.
However, on-demand PVCs can be owned by driver and reused by another executors during the Spark job&#8217;s
lifetime with the following options. This reduces the overhead of PVC creation and deletion.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.driver.ownPersistentVolumeClaim=true
spark.kubernetes.driver.reusePersistentVolumeClaim=true
</code></pre></div></div>
<p>In addition, since Spark 3.4, Spark driver is able to do PVC-oriented executor allocation which means
Spark counts the total number of created PVCs which the job can have, and holds on a new executor creation
if the driver owns the maximum number of PVCs. This helps the transition of the existing PVC from one executor
to another executor.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.driver.waitToReusePersistentVolumeClaim=true
</code></pre></div></div>
<h2 id="local-storage">Local Storage</h2>
<p>Spark supports using volumes to spill data during shuffles and other operations. To use a volume as local storage, the volume&#8217;s name should starts with <code class="language-plaintext highlighter-rouge">spark-local-dir-</code>, for example:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--conf spark.kubernetes.driver.volumes.[VolumeType].spark-local-dir-[VolumeName].mount.path=&lt;mount path&gt;
--conf spark.kubernetes.driver.volumes.[VolumeType].spark-local-dir-[VolumeName].mount.readOnly=false
</code></pre></div></div>
<p>Specifically, you can use persistent volume claims if the jobs require large shuffle and sorting operations in executors.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.claimName=OnDemand
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.storageClass=gp
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.sizeLimit=500Gi
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false
</code></pre></div></div>
<p>To enable shuffle data recovery feature via the built-in <code class="language-plaintext highlighter-rouge">KubernetesLocalDiskShuffleDataIO</code> plugin, we need to have the following. You may want to enable <code class="language-plaintext highlighter-rouge">spark.kubernetes.driver.waitToReusePersistentVolumeClaim</code> additionally.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data/spark-x/executor-x
spark.shuffle.sort.io.plugin.class=org.apache.spark.shuffle.KubernetesLocalDiskShuffleDataIO
</code></pre></div></div>
<p>If no volume is set as local storage, Spark uses temporary scratch space to spill data to disk during shuffles and other operations. When using Kubernetes as the resource manager the pods will be created with an <a href="https://kubernetes.io/docs/concepts/storage/volumes/#emptydir">emptyDir</a> volume mounted for each directory listed in <code class="language-plaintext highlighter-rouge">spark.local.dir</code> or the environment variable <code class="language-plaintext highlighter-rouge">SPARK_LOCAL_DIRS</code> . If no directories are explicitly specified then a default directory is created and configured appropriately.</p>
<p><code class="language-plaintext highlighter-rouge">emptyDir</code> volumes use the ephemeral storage feature of Kubernetes and do not persist beyond the life of the pod.</p>
<h3 id="using-ram-for-local-storage">Using RAM for local storage</h3>
<p><code class="language-plaintext highlighter-rouge">emptyDir</code> volumes use the nodes backing storage for ephemeral storage by default, this behaviour may not be appropriate for some compute environments. For example if you have diskless nodes with remote storage mounted over a network, having lots of executors doing IO to this remote storage may actually degrade performance.</p>
<p>In this case it may be desirable to set <code class="language-plaintext highlighter-rouge">spark.kubernetes.local.dirs.tmpfs=true</code> in your configuration which will cause the <code class="language-plaintext highlighter-rouge">emptyDir</code> volumes to be configured as <code class="language-plaintext highlighter-rouge">tmpfs</code> i.e. RAM backed volumes. When configured like this Spark&#8217;s local storage usage will count towards your pods memory usage therefore you may wish to increase your memory requests by increasing the value of <code class="language-plaintext highlighter-rouge">spark.{driver,executor}.memoryOverheadFactor</code> as appropriate.</p>
<h2 id="introspection-and-debugging">Introspection and Debugging</h2>
<p>These are the different ways in which you can investigate a running/completed Spark application, monitor progress, and
take actions.</p>
<h3 id="accessing-logs">Accessing Logs</h3>
<p>Logs can be accessed using the Kubernetes API and the <code class="language-plaintext highlighter-rouge">kubectl</code> CLI. When a Spark application is running, it&#8217;s possible
to stream logs from the application using:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl <span class="nt">-n</span><span class="o">=</span>&lt;namespace&gt; logs <span class="nt">-f</span> &lt;driver-pod-name&gt;
</code></pre></div></div>
<p>The same logs can also be accessed through the
<a href="https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/">Kubernetes dashboard</a> if installed on
the cluster.</p>
<p>When there exists a log collection system, you can expose it at Spark Driver <code class="language-plaintext highlighter-rouge">Executors</code> tab UI. For example,</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.ui.custom.executor.log.url='https://log-server/log?appId=&amp;execId='
</code></pre></div></div>
<p>You can add additional custom variables to this url template, populated with the values of existing executor environment variables like</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.executorEnv.SPARK_EXECUTOR_ATTRIBUTE_YOUR_VAR='$(EXISTING_EXECUTOR_ENV_VAR)'
spark.ui.custom.executor.log.url='https://log-server/log?appId=&amp;execId=&amp;your_var='
</code></pre></div></div>
<h3 id="accessing-driver-ui">Accessing Driver UI</h3>
<p>The UI associated with any application can be accessed locally using
<a href="https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/#forward-a-local-port-to-a-port-on-the-pod"><code class="language-plaintext highlighter-rouge">kubectl port-forward</code></a>.</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl port-forward &lt;driver-pod-name&gt; 4040:4040
</code></pre></div></div>
<p>Then, the Spark driver UI can be accessed on <code class="language-plaintext highlighter-rouge">http://localhost:4040</code>.</p>
<p>Since Apache Spark 4.0.0, Driver UI provides a way to see driver logs via a new configuration.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.driver.log.localDir=/tmp
</code></pre></div></div>
<p>Then, the Spark driver UI can be accessed on <code class="language-plaintext highlighter-rouge">http://localhost:4040/logs/</code>.
Optionally, the layout of log is configured by the following.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>spark.driver.log.layout="%m%n%ex"
</code></pre></div></div>
<h3 id="debugging">Debugging</h3>
<p>There may be several kinds of failures. If the Kubernetes API server rejects the request made from spark-submit, or the
connection is refused for a different reason, the submission logic should indicate the error encountered. However, if there
are errors during the running of the application, often, the best way to investigate may be through the Kubernetes CLI.</p>
<p>To get some basic information about the scheduling decisions made around the driver pod, you can run:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl describe pod &lt;spark-driver-pod&gt;
</code></pre></div></div>
<p>If the pod has encountered a runtime error, the status can be probed further using:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl logs &lt;spark-driver-pod&gt;
</code></pre></div></div>
<p>Status and logs of failed executor pods can be checked in similar ways. Finally, deleting the driver pod will clean up the entire spark
application, including all executors, associated service, etc. The driver pod can be thought of as the Kubernetes representation of
the Spark application.</p>
<h2 id="kubernetes-features">Kubernetes Features</h2>
<h3 id="configuration-file">Configuration File</h3>
<p>Your Kubernetes config file typically lives under <code class="language-plaintext highlighter-rouge">.kube/config</code> in your home directory or in a location specified by the <code class="language-plaintext highlighter-rouge">KUBECONFIG</code> environment variable. Spark on Kubernetes will attempt to use this file to do an initial auto-configuration of the Kubernetes client used to interact with the Kubernetes cluster. A variety of Spark configuration properties are provided that allow further customising the client configuration e.g. using an alternative authentication method.</p>
<h3 id="contexts">Contexts</h3>
<p>Kubernetes configuration files can contain multiple contexts that allow for switching between different clusters and/or user identities. By default Spark on Kubernetes will use your current context (which can be checked by running <code class="language-plaintext highlighter-rouge">kubectl config current-context</code>) when doing the initial auto-configuration of the Kubernetes client.</p>
<p>In order to use an alternative context users can specify the desired context via the Spark configuration property <code class="language-plaintext highlighter-rouge">spark.kubernetes.context</code> e.g. <code class="language-plaintext highlighter-rouge">spark.kubernetes.context=minikube</code>.</p>
<h3 id="namespaces">Namespaces</h3>
<p>Kubernetes has the concept of <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/">namespaces</a>.
Namespaces are ways to divide cluster resources between multiple users (via resource quota). Spark on Kubernetes can
use namespaces to launch Spark applications. This can be made use of through the <code class="language-plaintext highlighter-rouge">spark.kubernetes.namespace</code> configuration.</p>
<p>Kubernetes allows using <a href="https://kubernetes.io/docs/concepts/policy/resource-quotas/">ResourceQuota</a> to set limits on
resources, number of objects, etc on individual namespaces. Namespaces and ResourceQuota can be used in combination by
administrator to control sharing and resource allocation in a Kubernetes cluster running Spark applications.</p>
<h3 id="rbac">RBAC</h3>
<p>In Kubernetes clusters with <a href="https://kubernetes.io/docs/reference/access-authn-authz/rbac/">RBAC</a> enabled, users can configure
Kubernetes RBAC roles and service accounts used by the various Spark on Kubernetes components to access the Kubernetes
API server.</p>
<p>The Spark driver pod uses a Kubernetes service account to access the Kubernetes API server to create and watch executor
pods. The service account used by the driver pod must have the appropriate permission for the driver to be able to do
its work. Specifically, at minimum, the service account must be granted a
<a href="https://kubernetes.io/docs/reference/access-authn-authz/rbac/#role-and-clusterrole"><code class="language-plaintext highlighter-rouge">Role</code> or <code class="language-plaintext highlighter-rouge">ClusterRole</code></a> that allows driver
pods to create pods and services. By default, the driver pod is automatically assigned the <code class="language-plaintext highlighter-rouge">default</code> service account in
the namespace specified by <code class="language-plaintext highlighter-rouge">spark.kubernetes.namespace</code>, if no service account is specified when the pod gets created.</p>
<p>Depending on the version and setup of Kubernetes deployed, this <code class="language-plaintext highlighter-rouge">default</code> service account may or may not have the role
that allows driver pods to create pods and services under the default Kubernetes
<a href="https://kubernetes.io/docs/reference/access-authn-authz/rbac/">RBAC</a> policies. Sometimes users may need to specify a custom
service account that has the right role granted. Spark on Kubernetes supports specifying a custom service account to
be used by the driver pod through the configuration property
<code class="language-plaintext highlighter-rouge">spark.kubernetes.authenticate.driver.serviceAccountName=&lt;service account name&gt;</code>. For example, to make the driver pod
use the <code class="language-plaintext highlighter-rouge">spark</code> service account, a user simply adds the following option to the <code class="language-plaintext highlighter-rouge">spark-submit</code> command:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark
</code></pre></div></div>
<p>To create a custom service account, a user can use the <code class="language-plaintext highlighter-rouge">kubectl create serviceaccount</code> command. For example, the
following command creates a service account named <code class="language-plaintext highlighter-rouge">spark</code>:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl create serviceaccount spark
</code></pre></div></div>
<p>To grant a service account a <code class="language-plaintext highlighter-rouge">Role</code> or <code class="language-plaintext highlighter-rouge">ClusterRole</code>, a <code class="language-plaintext highlighter-rouge">RoleBinding</code> or <code class="language-plaintext highlighter-rouge">ClusterRoleBinding</code> is needed. To create
a <code class="language-plaintext highlighter-rouge">RoleBinding</code> or <code class="language-plaintext highlighter-rouge">ClusterRoleBinding</code>, a user can use the <code class="language-plaintext highlighter-rouge">kubectl create rolebinding</code> (or <code class="language-plaintext highlighter-rouge">clusterrolebinding</code>
for <code class="language-plaintext highlighter-rouge">ClusterRoleBinding</code>) command. For example, the following command creates an <code class="language-plaintext highlighter-rouge">edit</code> <code class="language-plaintext highlighter-rouge">ClusterRole</code> in the <code class="language-plaintext highlighter-rouge">default</code>
namespace and grants it to the <code class="language-plaintext highlighter-rouge">spark</code> service account created above:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>kubectl create clusterrolebinding spark-role <span class="nt">--clusterrole</span><span class="o">=</span>edit <span class="nt">--serviceaccount</span><span class="o">=</span>default:spark <span class="nt">--namespace</span><span class="o">=</span>default
</code></pre></div></div>
<p>Note that a <code class="language-plaintext highlighter-rouge">Role</code> can only be used to grant access to resources (like pods) within a single namespace, whereas a
<code class="language-plaintext highlighter-rouge">ClusterRole</code> can be used to grant access to cluster-scoped resources (like nodes) as well as namespaced resources
(like pods) across all namespaces. For Spark on Kubernetes, since the driver always creates executor pods in the
same namespace, a <code class="language-plaintext highlighter-rouge">Role</code> is sufficient, although users may use a <code class="language-plaintext highlighter-rouge">ClusterRole</code> instead. For more information on
RBAC authorization and how to configure Kubernetes service accounts for pods, please refer to
<a href="https://kubernetes.io/docs/reference/access-authn-authz/rbac/">Using RBAC Authorization</a> and
<a href="https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/">Configure Service Accounts for Pods</a>.</p>
<h2 id="spark-application-management">Spark Application Management</h2>
<p>Kubernetes provides simple application management via the spark-submit CLI tool in cluster mode.
Users can kill a job by providing the submission ID that is printed when submitting their job.
The submission ID follows the format <code class="language-plaintext highlighter-rouge">namespace:driver-pod-name</code>.
If user omits the namespace then the namespace set in current k8s context is used.
For example if user has set a specific namespace as follows <code class="language-plaintext highlighter-rouge">kubectl config set-context minikube --namespace=spark</code>
then the <code class="language-plaintext highlighter-rouge">spark</code> namespace will be used by default. On the other hand, if there is no namespace added to the specific context
then all namespaces will be considered by default. That means operations will affect all Spark applications matching the given submission ID regardless of namespace.
Moreover, spark-submit for application management uses the same backend code that is used for submitting the driver, so the same properties
like <code class="language-plaintext highlighter-rouge">spark.kubernetes.context</code> etc., can be re-used.</p>
<p>For example:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>spark-submit <span class="nt">--kill</span> spark:spark-pi-1547948636094-driver <span class="nt">--master</span> k8s://https://192.168.2.8:8443
</code></pre></div></div>
<p>Users also can list the application status by using the <code class="language-plaintext highlighter-rouge">--status</code> flag:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>spark-submit <span class="nt">--status</span> spark:spark-pi-1547948636094-driver <span class="nt">--master</span> k8s://https://192.168.2.8:8443
</code></pre></div></div>
<p>Both operations support glob patterns. For example user can run:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nv">$ </span>spark-submit <span class="nt">--kill</span> spark:spark-pi<span class="k">*</span> <span class="nt">--master</span> k8s://https://192.168.2.8:8443
</code></pre></div></div>
<p>The above will kill all application with the specific prefix.</p>
<p>User can specify the grace period for pod termination via the <code class="language-plaintext highlighter-rouge">spark.kubernetes.appKillPodDeletionGracePeriod</code> property,
using <code class="language-plaintext highlighter-rouge">--conf</code> as means to provide it (default value for all K8s pods is <a href="https://kubernetes.io/docs/concepts/workloads/pods/pod">30 secs</a>).</p>
<h2 id="future-work">Future Work</h2>
<p>There are several Spark on Kubernetes features that are currently being worked on or planned to be worked on. Those features are expected to eventually make it into future versions of the spark-kubernetes integration.</p>
<p>Some of these include:</p>
<ul>
<li>External Shuffle Service</li>
<li>Job Queues and Resource Management</li>
</ul>
<h1 id="configuration">Configuration</h1>
<p>See the <a href="configuration.html">configuration page</a> for information on Spark configurations. The following configurations are specific to Spark on Kubernetes.</p>
<h4 id="spark-properties">Spark Properties</h4>
<table class="spark-config">
<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
<tr>
<td><code>spark.kubernetes.context</code></td>
<td><code>(none)</code></td>
<td>
The context from the user Kubernetes configuration file used for the initial
auto-configuration of the Kubernetes client library. When not specified then
the users current context is used. <strong>NB:</strong> Many of the
auto-configured settings can be overridden by the use of other Spark
configuration properties e.g. <code>spark.kubernetes.namespace</code>.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.master</code></td>
<td><code>https://kubernetes.default.svc</code></td>
<td>
The internal Kubernetes master (API server) address to be used for driver to request executors or
'local[*]' for driver-pod-only mode.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.namespace</code></td>
<td><code>default</code></td>
<td>
The namespace that will be used for running the driver and executor pods.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.container.image</code></td>
<td><code>(none)</code></td>
<td>
Container image to use for the Spark application.
This is usually of the form <code>example.com/repo/spark:v1.0.0</code>.
This configuration is required and must be provided by the user, unless explicit
images are provided for each different container type.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.container.image</code></td>
<td><code>(value of spark.kubernetes.container.image)</code></td>
<td>
Custom container image to use for the driver.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.container.image</code></td>
<td><code>(value of spark.kubernetes.container.image)</code></td>
<td>
Custom container image to use for executors.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.container.image.pullPolicy</code></td>
<td><code>IfNotPresent</code></td>
<td>
Container image pull policy used when pulling images within Kubernetes.
Valid values are <code>Always</code>, <code>Never</code>, and <code>IfNotPresent</code>.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.container.image.pullSecrets</code></td>
<td><code></code></td>
<td>
Comma separated list of Kubernetes secrets used to pull images from private image registries.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.allocation.batch.size</code></td>
<td><code>10</code></td>
<td>
Number of pods to launch at once in each round of executor pod allocation.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.allocation.batch.delay</code></td>
<td><code>1s</code></td>
<td>
Time to wait between each round of executor pod allocation. Specifying values less than 1 second may lead to
excessive CPU usage on the spark driver.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.jars.avoidDownloadSchemes</code></td>
<td><code>(none)</code></td>
<td>
Comma-separated list of schemes for which jars will NOT be downloaded to the
driver local disk prior to be distributed to executors, only for kubernetes deployment.
For use in cases when the jars are big and executor counts are high,
concurrent download causes network saturation and timeouts.
Wildcard '*' is denoted to not downloading jars for any the schemes.
</td>
<td>4.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.submission.caCertFile</code></td>
<td>(none)</td>
<td>
Path to the CA cert file for connecting to the Kubernetes API server over TLS when starting the driver. This file
must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not provide
a scheme). In client mode, use <code>spark.kubernetes.authenticate.caCertFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.submission.clientKeyFile</code></td>
<td>(none)</td>
<td>
Path to the client key file for authenticating against the Kubernetes API server when starting the driver. This file
must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not provide
a scheme). In client mode, use <code>spark.kubernetes.authenticate.clientKeyFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.submission.clientCertFile</code></td>
<td>(none)</td>
<td>
Path to the client cert file for authenticating against the Kubernetes API server when starting the driver. This
file must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not
provide a scheme). In client mode, use <code>spark.kubernetes.authenticate.clientCertFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.submission.oauthToken</code></td>
<td>(none)</td>
<td>
OAuth token to use when authenticating against the Kubernetes API server when starting the driver. Note
that unlike the other authentication options, this is expected to be the exact string value of the token to use for
the authentication. In client mode, use <code>spark.kubernetes.authenticate.oauthToken</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.submission.oauthTokenFile</code></td>
<td>(none)</td>
<td>
Path to the OAuth token file containing the token to use when authenticating against the Kubernetes API server when starting the driver.
This file must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not
provide a scheme). In client mode, use <code>spark.kubernetes.authenticate.oauthTokenFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.caCertFile</code></td>
<td>(none)</td>
<td>
Path to the CA cert file for connecting to the Kubernetes API server over TLS from the driver pod when requesting
executors. This file must be located on the submitting machine's disk, and will be uploaded to the driver pod.
Specify this as a path as opposed to a URI (i.e. do not provide a scheme). In client mode, use
<code>spark.kubernetes.authenticate.caCertFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.clientKeyFile</code></td>
<td>(none)</td>
<td>
Path to the client key file for authenticating against the Kubernetes API server from the driver pod when requesting
executors. This file must be located on the submitting machine's disk, and will be uploaded to the driver pod as
a Kubernetes secret. Specify this as a path as opposed to a URI (i.e. do not provide a scheme).
In client mode, use <code>spark.kubernetes.authenticate.clientKeyFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.clientCertFile</code></td>
<td>(none)</td>
<td>
Path to the client cert file for authenticating against the Kubernetes API server from the driver pod when
requesting executors. This file must be located on the submitting machine's disk, and will be uploaded to the
driver pod as a Kubernetes secret. Specify this as a path as opposed to a URI (i.e. do not provide a scheme).
In client mode, use <code>spark.kubernetes.authenticate.clientCertFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.oauthToken</code></td>
<td>(none)</td>
<td>
OAuth token to use when authenticating against the Kubernetes API server from the driver pod when
requesting executors. Note that unlike the other authentication options, this must be the exact string value of
the token to use for the authentication. This token value is uploaded to the driver pod as a Kubernetes secret.
In client mode, use <code>spark.kubernetes.authenticate.oauthToken</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.oauthTokenFile</code></td>
<td>(none)</td>
<td>
Path to the OAuth token file containing the token to use when authenticating against the Kubernetes API server from the driver pod when
requesting executors. Note that unlike the other authentication options, this file must contain the exact string value of
the token to use for the authentication. This token value is uploaded to the driver pod as a secret. In client mode, use
<code>spark.kubernetes.authenticate.oauthTokenFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.mounted.caCertFile</code></td>
<td>(none)</td>
<td>
Path to the CA cert file for connecting to the Kubernetes API server over TLS from the driver pod when requesting
executors. This path must be accessible from the driver pod.
Specify this as a path as opposed to a URI (i.e. do not provide a scheme). In client mode, use
<code>spark.kubernetes.authenticate.caCertFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.mounted.clientKeyFile</code></td>
<td>(none)</td>
<td>
Path to the client key file for authenticating against the Kubernetes API server from the driver pod when requesting
executors. This path must be accessible from the driver pod.
Specify this as a path as opposed to a URI (i.e. do not provide a scheme). In client mode, use
<code>spark.kubernetes.authenticate.clientKeyFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.mounted.clientCertFile</code></td>
<td>(none)</td>
<td>
Path to the client cert file for authenticating against the Kubernetes API server from the driver pod when
requesting executors. This path must be accessible from the driver pod.
Specify this as a path as opposed to a URI (i.e. do not provide a scheme). In client mode, use
<code>spark.kubernetes.authenticate.clientCertFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.mounted.oauthTokenFile</code></td>
<td>(none)</td>
<td>
Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server from the driver pod when
requesting executors. This path must be accessible from the driver pod.
Note that unlike the other authentication options, this file must contain the exact string value of the token to use
for the authentication. In client mode, use <code>spark.kubernetes.authenticate.oauthTokenFile</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.driver.serviceAccountName</code></td>
<td><code>default</code></td>
<td>
Service account that is used when running the driver pod. The driver pod uses this service account when requesting
executor pods from the API server. Note that this cannot be specified alongside a CA cert file, client key file,
client cert file, and/or OAuth token. In client mode, use <code>spark.kubernetes.authenticate.serviceAccountName</code> instead.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.executor.serviceAccountName</code></td>
<td><code>(value of spark.kubernetes.authenticate.driver.serviceAccountName)</code></td>
<td>
Service account that is used when running the executor pod.
If this parameter is not setup, the fallback logic will use the driver's service account.
</td>
<td>3.1.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.caCertFile</code></td>
<td>(none)</td>
<td>
In client mode, path to the CA cert file for connecting to the Kubernetes API server over TLS when
requesting executors. Specify this as a path as opposed to a URI (i.e. do not provide a scheme).
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.clientKeyFile</code></td>
<td>(none)</td>
<td>
In client mode, path to the client key file for authenticating against the Kubernetes API server
when requesting executors. Specify this as a path as opposed to a URI (i.e. do not provide a scheme).
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.clientCertFile</code></td>
<td>(none)</td>
<td>
In client mode, path to the client cert file for authenticating against the Kubernetes API server
when requesting executors. Specify this as a path as opposed to a URI (i.e. do not provide a scheme).
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.oauthToken</code></td>
<td>(none)</td>
<td>
In client mode, the OAuth token to use when authenticating against the Kubernetes API server when
requesting executors. Note that unlike the other authentication options, this must be the exact string value of
the token to use for the authentication.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.authenticate.oauthTokenFile</code></td>
<td>(none)</td>
<td>
In client mode, path to the file containing the OAuth token to use when authenticating against the Kubernetes API
server when requesting executors.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.label.[LabelName]</code></td>
<td>(none)</td>
<td>
Add the label specified by <code>LabelName</code> to the driver pod.
For example, <code>spark.kubernetes.driver.label.something=true</code>.
Note that Spark also adds its own labels to the driver pod
for bookkeeping purposes.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.annotation.[AnnotationName]</code></td>
<td>(none)</td>
<td>
Add the Kubernetes <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/">annotation</a> specified by <code>AnnotationName</code> to the driver pod.
For example, <code>spark.kubernetes.driver.annotation.something=true</code>.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.service.label.[LabelName]</code></td>
<td>(none)</td>
<td>
Add the Kubernetes <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/">label</a> specified by <code>LabelName</code> to the driver service.
For example, <code>spark.kubernetes.driver.service.label.something=true</code>.
Note that Spark also adds its own labels to the driver service
for bookkeeping purposes.
</td>
<td>3.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.service.annotation.[AnnotationName]</code></td>
<td>(none)</td>
<td>
Add the Kubernetes <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/">annotation</a> specified by <code>AnnotationName</code> to the driver service.
For example, <code>spark.kubernetes.driver.service.annotation.something=true</code>.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.label.[LabelName]</code></td>
<td>(none)</td>
<td>
Add the label specified by <code>LabelName</code> to the executor pods.
For example, <code>spark.kubernetes.executor.label.something=true</code>.
Note that Spark also adds its own labels to the executor pod
for bookkeeping purposes.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.annotation.[AnnotationName]</code></td>
<td>(none)</td>
<td>
Add the Kubernetes <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/">annotation</a> specified by <code>AnnotationName</code> to the executor pods.
For example, <code>spark.kubernetes.executor.annotation.something=true</code>.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.pod.name</code></td>
<td>(none)</td>
<td>
Name of the driver pod. In cluster mode, if this is not set, the driver pod name is set to "spark.app.name"
suffixed by the current timestamp to avoid name conflicts. In client mode, if your application is running
inside a pod, it is highly recommended to set this to the name of the pod your driver is running in. Setting this
value in client mode allows the driver to become the owner of its executor pods, which in turn allows the executor
pods to be garbage collected by the cluster.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.podNamePrefix</code></td>
<td>(none)</td>
<td>
Prefix to use in front of the executor pod names. It must conform the rules defined by the Kubernetes
<a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names">DNS Label Names</a>.
The prefix will be used to generate executor pod names in the form of <code>\$podNamePrefix-exec-\$id</code>, where the `id` is
a positive int value, so the length of the `podNamePrefix` needs to be less than or equal to 47(= 63 - 10 - 6).
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.submission.waitAppCompletion</code></td>
<td><code>true</code></td>
<td>
In cluster mode, whether to wait for the application to finish before exiting the launcher process. When changed to
false, the launcher has a "fire-and-forget" behavior when launching the Spark job.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.report.interval</code></td>
<td><code>1s</code></td>
<td>
Interval between reports of the current Spark job status in cluster mode.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.apiPollingInterval</code></td>
<td><code>30s</code></td>
<td>
Interval between polls against the Kubernetes API server to inspect the state of executors.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.request.cores</code></td>
<td>(none)</td>
<td>
Specify the cpu request for the driver pod. Values conform to the Kubernetes <a href="https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu">convention</a>.
Example values include 0.1, 500m, 1.5, 5, etc., with the definition of cpu units documented in <a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units">CPU units</a>.
This takes precedence over <code>spark.driver.cores</code> for specifying the driver pod cpu request if set.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.limit.cores</code></td>
<td>(none)</td>
<td>
Specify a hard cpu <a href="https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container">limit</a> for the driver pod.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.request.cores</code></td>
<td>(none)</td>
<td>
Specify the cpu request for each executor pod. Values conform to the Kubernetes <a href="https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu">convention</a>.
Example values include 0.1, 500m, 1.5, 5, etc., with the definition of cpu units documented in <a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units">CPU units</a>.
This is distinct from <code>spark.executor.cores</code>: it is only used and takes precedence over <code>spark.executor.cores</code> for specifying the executor pod cpu request if set. Task
parallelism, e.g., number of tasks an executor can run concurrently is not affected by this.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.limit.cores</code></td>
<td>(none)</td>
<td>
Specify a hard cpu <a href="https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container">limit</a> for each executor pod launched for the Spark Application.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.node.selector.[labelKey]</code></td>
<td>(none)</td>
<td>
Adds to the node selector of the driver pod and executor pods, with key <code>labelKey</code> and the value as the
configuration's value. For example, setting <code>spark.kubernetes.node.selector.identifier</code> to <code>myIdentifier</code>
will result in the driver pod and executors having a node selector with key <code>identifier</code> and value
<code>myIdentifier</code>. Multiple node selector keys can be added by setting multiple configurations with this prefix.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.node.selector.[labelKey]</code></td>
<td>(none)</td>
<td>
Adds to the driver node selector of the driver pod, with key <code>labelKey</code> and the value as the
configuration's value. For example, setting <code>spark.kubernetes.driver.node.selector.identifier</code> to <code>myIdentifier</code>
will result in the driver pod having a node selector with key <code>identifier</code> and value
<code>myIdentifier</code>. Multiple driver node selector keys can be added by setting multiple configurations with this prefix.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.node.selector.[labelKey]</code></td>
<td>(none)</td>
<td>
Adds to the executor node selector of the executor pods, with key <code>labelKey</code> and the value as the
configuration's value. For example, setting <code>spark.kubernetes.executor.node.selector.identifier</code> to <code>myIdentifier</code>
will result in the executors having a node selector with key <code>identifier</code> and value
<code>myIdentifier</code>. Multiple executor node selector keys can be added by setting multiple configurations with this prefix.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driverEnv.[EnvironmentVariableName]</code></td>
<td>(none)</td>
<td>
Add the environment variable specified by <code>EnvironmentVariableName</code> to
the Driver process. The user can specify multiple of these to set multiple environment variables.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.secrets.[SecretName]</code></td>
<td>(none)</td>
<td>
Add the <a href="https://kubernetes.io/docs/concepts/configuration/secret/">Kubernetes Secret</a> named <code>SecretName</code> to the driver pod on the path specified in the value. For example,
<code>spark.kubernetes.driver.secrets.spark-secret=/etc/secrets</code>.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.secrets.[SecretName]</code></td>
<td>(none)</td>
<td>
Add the <a href="https://kubernetes.io/docs/concepts/configuration/secret/">Kubernetes Secret</a> named <code>SecretName</code> to the executor pod on the path specified in the value. For example,
<code>spark.kubernetes.executor.secrets.spark-secret=/etc/secrets</code>.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.secretKeyRef.[EnvName]</code></td>
<td>(none)</td>
<td>
Add as an environment variable to the driver container with name EnvName (case sensitive), the value referenced by key <code> key </code> in the data of the referenced <a href="https://kubernetes.io/docs/concepts/configuration/secret/#using-secrets-as-environment-variables">Kubernetes Secret</a>. For example,
<code>spark.kubernetes.driver.secretKeyRef.ENV_VAR=spark-secret:key</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.secretKeyRef.[EnvName]</code></td>
<td>(none)</td>
<td>
Add as an environment variable to the executor container with name EnvName (case sensitive), the value referenced by key <code> key </code> in the data of the referenced <a href="https://kubernetes.io/docs/concepts/configuration/secret/#using-secrets-as-environment-variables">Kubernetes Secret</a>. For example,
<code>spark.kubernetes.executor.secrets.ENV_VAR=spark-secret:key</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.path</code></td>
<td>(none)</td>
<td>
Add the <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> named <code>VolumeName</code> of the <code>VolumeType</code> type to the driver pod on the path specified in the value. For example,
<code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.mount.path=/checkpoint</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.subPath</code></td>
<td>(none)</td>
<td>
Specifies a <a href="https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath">subpath</a> to be mounted from the volume into the driver pod.
<code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.mount.subPath=checkpoint</code>.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.readOnly</code></td>
<td>(none)</td>
<td>
Specify if the mounted volume is read only or not. For example,
<code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.mount.readOnly=false</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].options.[OptionName]</code></td>
<td>(none)</td>
<td>
Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> options passed to the Kubernetes with <code>OptionName</code> as key having specified value, must conform with Kubernetes option format. For example,
<code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.options.claimName=spark-pvc-claim</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].label.[LabelName]</code></td>
<td>(none)</td>
<td>
Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> labels passed to the Kubernetes with <code>LabelName</code> as key having specified value, must conform with Kubernetes label format. For example,
<code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.label.foo=bar</code>.
</td>
<td>4.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].annotation.[AnnotationName]</code></td>
<td>(none)</td>
<td>
Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> annotations passed to the Kubernetes with <code>AnnotationName</code> as key having specified value, must conform with Kubernetes annotations format. For example,
<code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.annotation.foo=bar</code>.
</td>
<td>4.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].mount.path</code></td>
<td>(none)</td>
<td>
Add the <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> named <code>VolumeName</code> of the <code>VolumeType</code> type to the executor pod on the path specified in the value. For example,
<code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.mount.path=/checkpoint</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].mount.subPath</code></td>
<td>(none)</td>
<td>
Specifies a <a href="https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath">subpath</a> to be mounted from the volume into the executor pod.
<code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.mount.subPath=checkpoint</code>.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].mount.readOnly</code></td>
<td>false</td>
<td>
Specify if the mounted volume is read only or not. For example,
<code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.mount.readOnly=false</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].options.[OptionName]</code></td>
<td>(none)</td>
<td>
Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> options passed to the Kubernetes with <code>OptionName</code> as key having specified value. For example,
<code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.options.claimName=spark-pvc-claim</code>.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].label.[LabelName]</code></td>
<td>(none)</td>
<td>
Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> labels passed to the Kubernetes with <code>LabelName</code> as key having specified value, must conform with Kubernetes label format. For example,
<code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.label.foo=bar</code>.
</td>
<td>4.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].annotation.[AnnotationName]</code></td>
<td>(none)</td>
<td>
Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> annotations passed to the Kubernetes with <code>AnnotationName</code> as key having specified value, must conform with Kubernetes annotations format. For example,
<code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.annotation.foo=bar</code>.
</td>
<td>4.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.local.dirs.tmpfs</code></td>
<td><code>false</code></td>
<td>
Configure the <code>emptyDir</code> volumes used to back <code>SPARK_LOCAL_DIRS</code> within the Spark driver and executor pods to use <code>tmpfs</code> backing i.e. RAM. See <a href="#local-storage">Local Storage</a> earlier on this page
for more discussion of this.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.memoryOverheadFactor</code></td>
<td><code>0.1</code></td>
<td>
This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and <code>tmpfs</code>-based local directories when <code>spark.kubernetes.local.dirs.tmpfs</code> is <code>true</code>. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs.
This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default.
This will be overridden by the value set by <code>spark.driver.memoryOverheadFactor</code> and <code>spark.executor.memoryOverheadFactor</code> explicitly.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
<td><code>"3"</code></td>
<td>
This sets the major Python version of the docker image used to run the driver and executor containers.
It can be only "3". This configuration was deprecated from Spark 3.1.0, and is effectively no-op.
Users should set 'spark.pyspark.python' and 'spark.pyspark.driver.python' configurations or
'PYSPARK_PYTHON' and 'PYSPARK_DRIVER_PYTHON' environment variables.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.krb5.path</code></td>
<td><code>(none)</code></td>
<td>
Specify the local location of the krb5.conf file to be mounted on the driver and executors for Kerberos interaction.
It is important to note that the KDC defined needs to be visible from inside the containers.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.krb5.configMapName</code></td>
<td><code>(none)</code></td>
<td>
Specify the name of the ConfigMap, containing the krb5.conf file, to be mounted on the driver and executors
for Kerberos interaction. The KDC defined needs to be visible from inside the containers. The ConfigMap must also
be in the same namespace of the driver and executor pods.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.hadoop.configMapName</code></td>
<td><code>(none)</code></td>
<td>
Specify the name of the ConfigMap, containing the HADOOP_CONF_DIR files, to be mounted on the driver
and executors for custom Hadoop configuration.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.tokenSecret.name</code></td>
<td><code>(none)</code></td>
<td>
Specify the name of the secret where your existing delegation tokens are stored. This removes the need for the job user
to provide any kerberos credentials for launching a job.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.tokenSecret.itemKey</code></td>
<td><code>(none)</code></td>
<td>
Specify the item key of the data where your existing delegation tokens are stored. This removes the need for the job user
to provide any kerberos credentials for launching a job.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.podTemplateFile</code></td>
<td>(none)</td>
<td>
Specify the local file that contains the driver <a href="#pod-template">pod template</a>. For example
<code>spark.kubernetes.driver.podTemplateFile=/path/to/driver-pod-template.yaml</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.podTemplateContainerName</code></td>
<td>(none)</td>
<td>
Specify the container name to be used as a basis for the driver in the given <a href="#pod-template">pod template</a>.
For example <code>spark.kubernetes.driver.podTemplateContainerName=spark-driver</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.podTemplateFile</code></td>
<td>(none)</td>
<td>
Specify the local file that contains the executor <a href="#pod-template">pod template</a>. For example
<code>spark.kubernetes.executor.podTemplateFile=/path/to/executor-pod-template.yaml</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.podTemplateContainerName</code></td>
<td>(none)</td>
<td>
Specify the container name to be used as a basis for the executor in the given <a href="#pod-template">pod template</a>.
For example <code>spark.kubernetes.executor.podTemplateContainerName=spark-executor</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.deleteOnTermination</code></td>
<td>true</td>
<td>
Specify whether executor pods should be deleted in case of failure or normal termination.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.checkAllContainers</code></td>
<td><code>true</code></td>
<td>
Specify whether executor pods should be check all containers (including sidecars) or only the executor container when determining the pod status.
</td>
<td>3.1.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.submission.connectionTimeout</code></td>
<td><code>10000</code></td>
<td>
Connection timeout in milliseconds for the kubernetes client to use for starting the driver.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.submission.requestTimeout</code></td>
<td><code>10000</code></td>
<td>
Request timeout in milliseconds for the kubernetes client to use for starting the driver.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.trust.certificates</code></td>
<td><code>false</code></td>
<td>
If set to true then client can submit to kubernetes cluster only with token.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.connectionTimeout</code></td>
<td><code>10000</code></td>
<td>
Connection timeout in milliseconds for the kubernetes client in driver to use when requesting executors.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.requestTimeout</code></td>
<td><code>10000</code></td>
<td>
Request timeout in milliseconds for the kubernetes client in driver to use when requesting executors.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.appKillPodDeletionGracePeriod</code></td>
<td>(none)</td>
<td>
Specify the grace period in seconds when deleting a Spark application using spark-submit.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.dynamicAllocation.deleteGracePeriod</code></td>
<td><code>5s</code></td>
<td>
How long to wait for executors to shut down gracefully before a forceful kill.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.file.upload.path</code></td>
<td>(none)</td>
<td>
Path to store files at the spark submit side in cluster mode. For example:
<code>spark.kubernetes.file.upload.path=s3a://&lt;s3-bucket&gt;/path</code>
File should specified as <code>file://path/to/file </code> or absolute path.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.decommissionLabel</code></td>
<td>(none)</td>
<td>
Label to be applied to pods which are exiting or being decommissioned. Intended for use
with pod disruption budgets, deletion costs, and similar.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.decommissionLabelValue</code></td>
<td>(none)</td>
<td>
Value to be applied with the label when
<code>spark.kubernetes.executor.decommissionLabel</code> is enabled.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.scheduler.name</code></td>
<td>(none)</td>
<td>
Specify the scheduler name for each executor pod.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.scheduler.name</code></td>
<td>(none)</td>
<td>
Specify the scheduler name for driver pod.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.scheduler.name</code></td>
<td>(none)</td>
<td>
Specify the scheduler name for driver and executor pods. If `spark.kubernetes.driver.scheduler.name` or
`spark.kubernetes.executor.scheduler.name` is set, will override this.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.configMap.maxSize</code></td>
<td><code>1048576</code></td>
<td>
Max size limit for a config map.
This is configurable as per <a href="https://etcd.io/docs/latest/dev-guide/limit/">limit</a> on k8s server end.
</td>
<td>3.1.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.missingPodDetectDelta</code></td>
<td><code>30s</code></td>
<td>
When a registered executor's POD is missing from the Kubernetes API server's polled
list of PODs then this delta time is taken as the accepted time difference between the
registration time and the time of the polling. After this time the POD is considered
missing from the cluster and the executor will be removed.
</td>
<td>3.1.1</td>
</tr>
<tr>
<td><code>spark.kubernetes.decommission.script</code></td>
<td><code>/opt/decom.sh</code></td>
<td>
The location of the script to use for graceful decommissioning.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.service.deleteOnTermination</code></td>
<td><code>true</code></td>
<td>
If true, driver service will be deleted on Spark application termination. If false, it will be cleaned up when the driver pod is deletion.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.service.ipFamilyPolicy</code></td>
<td><code>SingleStack</code></td>
<td>
K8s IP Family Policy for Driver Service. Valid values are
<code>SingleStack</code>, <code>PreferDualStack</code>, and <code>RequireDualStack</code>.
</td>
<td>3.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.service.ipFamilies</code></td>
<td><code>IPv4</code></td>
<td>
A list of IP families for K8s Driver Service. Valid values are
<code>IPv4</code> and <code>IPv6</code>.
</td>
<td>3.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.ownPersistentVolumeClaim</code></td>
<td><code>true</code></td>
<td>
If true, driver pod becomes the owner of on-demand persistent volume claims instead of the executor pods
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.reusePersistentVolumeClaim</code></td>
<td><code>true</code></td>
<td>
If true, driver pod tries to reuse driver-owned on-demand persistent volume claims
of the deleted executor pods if exists. This can be useful to reduce executor pod
creation delay by skipping persistent volume creations. Note that a pod in
`Terminating` pod status is not a deleted pod by definition and its resources
including persistent volume claims are not reusable yet. Spark will create new
persistent volume claims when there exists no reusable one. In other words, the total
number of persistent volume claims can be larger than the number of running executors
sometimes. This config requires <code>spark.kubernetes.driver.ownPersistentVolumeClaim=true.</code>
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.waitToReusePersistentVolumeClaim</code></td>
<td><code>false</code></td>
<td>
If true, driver pod counts the number of created on-demand persistent volume claims
and wait if the number is greater than or equal to the total number of volumes which
the Spark job is able to have. This config requires both
<code>spark.kubernetes.driver.ownPersistentVolumeClaim=true</code> and
<code>spark.kubernetes.driver.reusePersistentVolumeClaim=true.</code>
</td>
<td>3.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.disableConfigMap</code></td>
<td><code>false</code></td>
<td>
If true, disable ConfigMap creation for executors.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.driver.pod.featureSteps</code></td>
<td>(none)</td>
<td>
Class names of an extra driver pod feature step implementing
`KubernetesFeatureConfigStep`. This is a developer API. Comma separated.
Runs after all of Spark internal feature steps. Since 3.3.0, your driver feature step
can implement `KubernetesDriverCustomFeatureConfigStep` where the driver config
is also available.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.pod.featureSteps</code></td>
<td>(none)</td>
<td>
Class names of an extra executor pod feature step implementing
`KubernetesFeatureConfigStep`. This is a developer API. Comma separated.
Runs after all of Spark internal feature steps. Since 3.3.0, your executor feature step
can implement `KubernetesExecutorCustomFeatureConfigStep` where the executor config
is also available.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.allocation.maxPendingPods</code></td>
<td><code>Int.MaxValue</code></td>
<td>
Maximum number of pending PODs allowed during executor allocation for this
application. Those newly requested executors which are unknown by Kubernetes yet are
also counted into this limit as they will change into pending PODs by time.
This limit is independent from the resource profiles as it limits the sum of all
allocation for all the used resource profiles.
</td>
<td>3.2.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.allocation.pods.allocator</code></td>
<td><code>direct</code></td>
<td>
Allocator to use for pods. Possible values are <code>direct</code> (the default)
and <code>statefulset</code>, or a full class name of a class implementing `AbstractPodsAllocator`.
Future version may add Job or replicaset. This is a developer API and may change
or be removed at anytime.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.allocation.executor.timeout</code></td>
<td><code>600s</code></td>
<td>
Time to wait before a newly created executor POD request, which does not reached
the POD pending state yet, considered timedout and will be deleted.
</td>
<td>3.1.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.allocation.driver.readinessTimeout</code></td>
<td><code>1s</code></td>
<td>
Time to wait for driver pod to get ready before creating executor pods. This wait
only happens on application start. If timeout happens, executor pods will still be
created.
</td>
<td>3.1.3</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.enablePollingWithResourceVersion</code></td>
<td><code>false</code></td>
<td>
If true, `resourceVersion` is set with `0` during invoking pod listing APIs
in order to allow API Server-side caching. This should be used carefully.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.eventProcessingInterval</code></td>
<td><code>1s</code></td>
<td>
Interval between successive inspection of executor events sent from the Kubernetes API.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.rollInterval</code></td>
<td><code>0s</code></td>
<td>
Interval between executor roll operations. It's disabled by default with `0s`.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.minTasksPerExecutorBeforeRolling</code></td>
<td><code>0</code></td>
<td>
The minimum number of tasks per executor before rolling.
Spark will not roll executors whose total number of tasks is smaller
than this configuration. The default value is zero.
</td>
<td>3.3.0</td>
</tr>
<tr>
<td><code>spark.kubernetes.executor.rollPolicy</code></td>
<td><code>OUTLIER</code></td>
<td>
Executor roll policy: Valid values are ID, ADD_TIME, TOTAL_GC_TIME,
TOTAL_DURATION, FAILED_TASKS, and OUTLIER (default).
When executor roll happens, Spark uses this policy to choose
an executor and decommission it. The built-in policies are based on executor summary
and newly started executors are protected by spark.kubernetes.executor.minTasksPerExecutorBeforeRolling.
ID policy chooses an executor with the smallest executor ID.
ADD_TIME policy chooses an executor with the smallest add-time.
TOTAL_GC_TIME policy chooses an executor with the biggest total task GC time.
TOTAL_DURATION policy chooses an executor with the biggest total task time.
AVERAGE_DURATION policy chooses an executor with the biggest average task time.
FAILED_TASKS policy chooses an executor with the most number of failed tasks.
OUTLIER policy chooses an executor with outstanding statistics which is bigger than
at least two standard deviation from the mean in average task time,
total task time, total task GC time, and the number of failed tasks if exists.
If there is no outlier, it works like TOTAL_DURATION policy.
</td>
<td>3.3.0</td>
</tr>
</table>
<h4 id="pod-template-properties">Pod template properties</h4>
<p>See the below table for the full list of pod specifications that will be overwritten by spark.</p>
<h3 id="pod-metadata">Pod Metadata</h3>
<table>
<thead><tr><th>Pod metadata key</th><th>Modified value</th><th>Description</th></tr></thead>
<tr>
<td>name</td>
<td>Value of <code>spark.kubernetes.driver.pod.name</code></td>
<td>
The driver pod name will be overwritten with either the configured or default value of
<code>spark.kubernetes.driver.pod.name</code>. The executor pod names will be unaffected.
</td>
</tr>
<tr>
<td>namespace</td>
<td>Value of <code>spark.kubernetes.namespace</code></td>
<td>
Spark makes strong assumptions about the driver and executor namespaces. Both driver and executor namespaces will
be replaced by either the configured or default spark conf value.
</td>
</tr>
<tr>
<td>labels</td>
<td>Adds the labels from <code>spark.kubernetes.{driver,executor}.label.*</code></td>
<td>
Spark will add additional labels specified by the spark configuration.
</td>
</tr>
<tr>
<td>annotations</td>
<td>Adds the annotations from <code>spark.kubernetes.{driver,executor}.annotation.*</code></td>
<td>
Spark will add additional annotations specified by the spark configuration.
</td>
</tr>
</table>
<h3 id="pod-spec">Pod Spec</h3>
<table>
<thead><tr><th>Pod spec key</th><th>Modified value</th><th>Description</th></tr></thead>
<tr>
<td>imagePullSecrets</td>
<td>Adds image pull secrets from <code>spark.kubernetes.container.image.pullSecrets</code></td>
<td>
Additional pull secrets will be added from the spark configuration to both executor pods.
</td>
</tr>
<tr>
<td>nodeSelector</td>
<td>Adds node selectors from <code>spark.kubernetes.node.selector.*</code></td>
<td>
Additional node selectors will be added from the spark configuration to both executor pods.
</td>
</tr>
<tr>
<td>restartPolicy</td>
<td><code>"never"</code></td>
<td>
Spark assumes that both drivers and executors never restart.
</td>
</tr>
<tr>
<td>serviceAccount</td>
<td>Value of <code>spark.kubernetes.authenticate.driver.serviceAccountName</code></td>
<td>
Spark will override <code>serviceAccount</code> with the value of the spark configuration for only
driver pods, and only if the spark configuration is specified. Executor pods will remain unaffected.
</td>
</tr>
<tr>
<td>serviceAccountName</td>
<td>Value of <code>spark.kubernetes.authenticate.driver.serviceAccountName</code></td>
<td>
Spark will override <code>serviceAccountName</code> with the value of the spark configuration for only
driver pods, and only if the spark configuration is specified. Executor pods will remain unaffected.
</td>
</tr>
<tr>
<td>volumes</td>
<td>Adds volumes from <code>spark.kubernetes.{driver,executor}.volumes.[VolumeType].[VolumeName].mount.path</code></td>
<td>
Spark will add volumes as specified by the spark conf, as well as additional volumes necessary for passing
spark conf and pod template files.
</td>
</tr>
</table>
<h3 id="container-spec">Container spec</h3>
<p>The following affect the driver and executor containers. All other containers in the pod spec will be unaffected.</p>
<table>
<thead><tr><th>Container spec key</th><th>Modified value</th><th>Description</th></tr></thead>
<tr>
<td>env</td>
<td>Adds env variables from <code>spark.kubernetes.driverEnv.[EnvironmentVariableName]</code></td>
<td>
Spark will add driver env variables from <code>spark.kubernetes.driverEnv.[EnvironmentVariableName]</code>, and
executor env variables from <code>spark.executorEnv.[EnvironmentVariableName]</code>.
</td>
</tr>
<tr>
<td>image</td>
<td>Value of <code>spark.kubernetes.{driver,executor}.container.image</code></td>
<td>
The image will be defined by the spark configurations.
</td>
</tr>
<tr>
<td>imagePullPolicy</td>
<td>Value of <code>spark.kubernetes.container.image.pullPolicy</code></td>
<td>
Spark will override the pull policy for both driver and executors.
</td>
</tr>
<tr>
<td>name</td>
<td>See description</td>
<td>
The container name will be assigned by spark ("spark-kubernetes-driver" for the driver container, and
"spark-kubernetes-executor" for each executor container) if not defined by the pod template. If the container is defined by the
template, the template's name will be used.
</td>
</tr>
<tr>
<td>resources</td>
<td>See description</td>
<td>
The cpu limits are set by <code>spark.kubernetes.{driver,executor}.limit.cores</code>. The cpu is set by
<code>spark.{driver,executor}.cores</code>. The memory request and limit are set by summing the values of
<code>spark.{driver,executor}.memory</code> and <code>spark.{driver,executor}.memoryOverhead</code>.
Other resource limits are set by <code>spark.{driver,executor}.resources.{resourceName}.*</code> configs.
</td>
</tr>
<tr>
<td>volumeMounts</td>
<td>Add volumes from <code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].mount.{path,readOnly}</code></td>
<td>
Spark will add volumes as specified by the spark conf, as well as additional volumes necessary for passing
spark conf and pod template files.
</td>
</tr>
</table>
<h3 id="resource-allocation-and-configuration-overview">Resource Allocation and Configuration Overview</h3>
<p>Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the <a href="configuration.html">configuration page</a>. This section only talks about the Kubernetes specific aspects of resource scheduling.</p>
<p>The user is responsible to properly configuring the Kubernetes cluster to have the resources available and ideally isolate each resource per container so that a resource is not shared between multiple containers. If the resource is not isolated the user is responsible for writing a discovery script so that the resource is not shared between containers. See the Kubernetes documentation for specifics on configuring Kubernetes with <a href="https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/">custom resources</a>.</p>
<p>Spark automatically handles translating the Spark configs <code>spark.{driver/executor}.resource.{resourceType}</code> into the kubernetes configs as long as the Kubernetes resource type follows the Kubernetes device plugin format of <code class="language-plaintext highlighter-rouge">vendor-domain/resourcetype</code>. The user must specify the vendor using the <code>spark.{driver/executor}.resource.{resourceType}.vendor</code> config. The user does not need to explicitly add anything if you are using Pod templates. For reference and an example, you can see the Kubernetes documentation for scheduling <a href="https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/">GPUs</a>. Spark only supports setting the resource limits.</p>
<p>Kubernetes does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in <code class="language-plaintext highlighter-rouge">examples/src/main/scripts/getGpusResources.sh</code>. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.</p>
<h3 id="resource-level-scheduling-overview">Resource Level Scheduling Overview</h3>
<p>There are several resource level scheduling features supported by Spark on Kubernetes.</p>
<h4 id="priority-scheduling">Priority Scheduling</h4>
<p>Kubernetes supports <a href="https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption">Pod priority</a> by default.</p>
<p>Spark on Kubernetes allows defining the priority of jobs by <a href="#pod-template">Pod template</a>. The user can specify the <code>priorityClassName</code> in driver or executor Pod template <code>spec</code> section. Below is an example to show how to specify it:</p>
<div class="language-yaml highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="na">apiVersion</span><span class="pi">:</span> <span class="s">v1</span>
<span class="na">Kind</span><span class="pi">:</span> <span class="s">Pod</span>
<span class="na">metadata</span><span class="pi">:</span>
<span class="na">labels</span><span class="pi">:</span>
<span class="na">template-label-key</span><span class="pi">:</span> <span class="s">driver-template-label-value</span>
<span class="na">spec</span><span class="pi">:</span>
<span class="c1"># Specify the priority in here</span>
<span class="na">priorityClassName</span><span class="pi">:</span> <span class="s">system-node-critical</span>
<span class="na">containers</span><span class="pi">:</span>
<span class="pi">-</span> <span class="na">name</span><span class="pi">:</span> <span class="s">test-driver-container</span>
<span class="na">image</span><span class="pi">:</span> <span class="s">will-be-overwritten</span>
</code></pre></div></div>
<h4 id="customized-kubernetes-schedulers-for-spark-on-kubernetes">Customized Kubernetes Schedulers for Spark on Kubernetes</h4>
<p>Spark allows users to specify a custom Kubernetes schedulers.</p>
<ol>
<li>
<p>Specify a scheduler name.</p>
<p>Users can specify a custom scheduler using <code>spark.kubernetes.scheduler.name</code> or
<code>spark.kubernetes.{driver/executor}.scheduler.name</code> configuration.</p>
</li>
<li>
<p>Specify scheduler related configurations.</p>
<p>To configure the custom scheduler the user can use <a href="#pod-template">Pod templates</a>, add labels (<code>spark.kubernetes.{driver,executor}.label.*</code>), annotations (<code>spark.kubernetes.{driver/executor}.annotation.*</code>) or scheduler specific configurations (such as <code>spark.kubernetes.scheduler.volcano.podGroupTemplateFile</code>).</p>
</li>
<li>
<p>Specify scheduler feature step.</p>
<p>Users may also consider to use <code>spark.kubernetes.{driver/executor}.pod.featureSteps</code> to support more complex requirements, including but not limited to:</p>
<ul>
<li>Create additional Kubernetes custom resources for driver/executor scheduling.</li>
<li>Set scheduler hints according to configuration or existing Pod info dynamically.</li>
</ul>
</li>
</ol>
<h4 id="using-volcano-as-customized-scheduler-for-spark-on-kubernetes">Using Volcano as Customized Scheduler for Spark on Kubernetes</h4>
<h5 id="prerequisites-1">Prerequisites</h5>
<ul>
<li>
<p>Spark on Kubernetes with <a href="https://volcano.sh/en">Volcano</a> as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.7.0. Below is an example to install Volcano 1.7.0:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code>kubectl apply <span class="nt">-f</span> https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml
</code></pre></div> </div>
</li>
</ul>
<h5 id="build">Build</h5>
<p>To create a Spark distribution along with Volcano support like those distributed by the Spark <a href="https://spark.apache.org/downloads.html">Downloads page</a>, also see more in <a href="https://spark.apache.org/docs/latest/building-spark.html">&#8220;Building Spark&#8221;</a>:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./dev/make-distribution.sh <span class="nt">--name</span> custom-spark <span class="nt">--pip</span> <span class="nt">--r</span> <span class="nt">--tgz</span> <span class="nt">-Psparkr</span> <span class="nt">-Phive</span> <span class="nt">-Phive-thriftserver</span> <span class="nt">-Pkubernetes</span> <span class="nt">-Pvolcano</span>
</code></pre></div></div>
<h5 id="usage">Usage</h5>
<p>Spark on Kubernetes allows using Volcano as a custom scheduler. Users can use Volcano to
support more advanced resource scheduling: queue scheduling, resource reservation, priority scheduling, and more.</p>
<p>To use Volcano as a custom scheduler the user needs to specify the following configuration options:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c"># Specify volcano scheduler and PodGroup template</span>
<span class="nt">--conf</span> spark.kubernetes.scheduler.name<span class="o">=</span>volcano
<span class="nt">--conf</span> spark.kubernetes.scheduler.volcano.podGroupTemplateFile<span class="o">=</span>/path/to/podgroup-template.yaml
<span class="c"># Specify driver/executor VolcanoFeatureStep</span>
<span class="nt">--conf</span> spark.kubernetes.driver.pod.featureSteps<span class="o">=</span>org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
<span class="nt">--conf</span> spark.kubernetes.executor.pod.featureSteps<span class="o">=</span>org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
</code></pre></div></div>
<h5 id="volcano-feature-step">Volcano Feature Step</h5>
<p>Volcano feature steps help users to create a Volcano PodGroup and set driver/executor pod annotation to link with this <a href="https://volcano.sh/en/docs/podgroup/">PodGroup</a>.</p>
<p>Note that currently only driver/job level PodGroup is supported in Volcano Feature Step.</p>
<h5 id="volcano-podgroup-template">Volcano PodGroup Template</h5>
<p>Volcano defines PodGroup spec using <a href="https://volcano.sh/en/docs/podgroup/#example">CRD yaml</a>.</p>
<p>Similar to <a href="#pod-template">Pod template</a>, Spark users can use Volcano PodGroup Template to define the PodGroup spec configurations.
To do so, specify the Spark property <code class="language-plaintext highlighter-rouge">spark.kubernetes.scheduler.volcano.podGroupTemplateFile</code> to point to files accessible to the <code class="language-plaintext highlighter-rouge">spark-submit</code> process.
Below is an example of PodGroup template:</p>
<div class="language-yaml highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="na">apiVersion</span><span class="pi">:</span> <span class="s">scheduling.volcano.sh/v1beta1</span>
<span class="na">kind</span><span class="pi">:</span> <span class="s">PodGroup</span>
<span class="na">spec</span><span class="pi">:</span>
<span class="c1"># Specify minMember to 1 to make a driver pod</span>
<span class="na">minMember</span><span class="pi">:</span> <span class="m">1</span>
<span class="c1"># Specify minResources to support resource reservation (the driver pod resource and executors pod resource should be considered)</span>
<span class="c1"># It is useful for ensource the available resources meet the minimum requirements of the Spark job and avoiding the</span>
<span class="c1"># situation where drivers are scheduled, and then they are unable to schedule sufficient executors to progress.</span>
<span class="na">minResources</span><span class="pi">:</span>
<span class="na">cpu</span><span class="pi">:</span> <span class="s2">"</span><span class="s">2"</span>
<span class="na">memory</span><span class="pi">:</span> <span class="s2">"</span><span class="s">3Gi"</span>
<span class="c1"># Specify the priority, help users to specify job priority in the queue during scheduling.</span>
<span class="na">priorityClassName</span><span class="pi">:</span> <span class="s">system-node-critical</span>
<span class="c1"># Specify the queue, indicates the resource queue which the job should be submitted to</span>
<span class="na">queue</span><span class="pi">:</span> <span class="s">default</span>
</code></pre></div></div>
<h4 id="using-apache-yunikorn-as-customized-scheduler-for-spark-on-kubernetes">Using Apache YuniKorn as Customized Scheduler for Spark on Kubernetes</h4>
<p><a href="https://yunikorn.apache.org/">Apache YuniKorn</a> is a resource scheduler for Kubernetes that provides advanced batch scheduling
capabilities, such as job queuing, resource fairness, min/max queue capacity and flexible job ordering policies.
For available Apache YuniKorn features, please refer to <a href="https://yunikorn.apache.org/docs/get_started/core_features">core features</a>.</p>
<h5 id="prerequisites-2">Prerequisites</h5>
<p>Install Apache YuniKorn:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code>helm repo add yunikorn https://apache.github.io/yunikorn-release
helm repo update
helm <span class="nb">install </span>yunikorn yunikorn/yunikorn <span class="nt">--namespace</span> yunikorn <span class="nt">--version</span> 1.6.3 <span class="nt">--create-namespace</span> <span class="nt">--set</span> <span class="nv">embedAdmissionController</span><span class="o">=</span><span class="nb">false</span>
</code></pre></div></div>
<p>The above steps will install YuniKorn v1.6.3 on an existing Kubernetes cluster.</p>
<h5 id="get-started">Get started</h5>
<p>Submit Spark jobs with the following extra options:</p>
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="nt">--conf</span> spark.kubernetes.scheduler.name<span class="o">=</span>yunikorn
<span class="nt">--conf</span> spark.kubernetes.driver.label.queue<span class="o">=</span>root.default
<span class="nt">--conf</span> spark.kubernetes.executor.label.queue<span class="o">=</span>root.default
<span class="nt">--conf</span> spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id<span class="o">={{</span>APP_ID<span class="o">}}</span>
<span class="nt">--conf</span> spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id<span class="o">={{</span>APP_ID<span class="o">}}</span>
</code></pre></div></div>
<p>Note that {{APP_ID}} is the built-in variable that will be substituted with Spark job ID automatically.
With the above configuration, the job will be scheduled by YuniKorn scheduler instead of the default Kubernetes scheduler.</p>
<h3 id="stage-level-scheduling-overview">Stage Level Scheduling Overview</h3>
<p>Stage level scheduling is supported on Kubernetes:</p>
<ul>
<li>When dynamic allocation is disabled: It allows users to specify different task resource requirements at the stage level and will use the same executors requested at startup.</li>
<li>When dynamic allocation is enabled: It allows users to specify task and executor resource requirements at the stage level and will request the extra executors. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn&#8217;t support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.
Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propagated to custom ResourceProfiles.</li>
</ul>
</div>
<!-- /container -->
</div>
<script src="js/vendor/jquery-3.5.1.min.js"></script>
<script src="js/vendor/bootstrap.bundle.min.js"></script>
<script src="js/vendor/anchor.min.js"></script>
<script src="js/main.js"></script>
<script type="text/javascript" src="js/vendor/docsearch.min.js"></script>
<script type="text/javascript">
// DocSearch is entirely free and automated. DocSearch is built in two parts:
// 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link
// in your website and extract content from every page it traverses. It then pushes this
// content to an Algolia index.
// 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
// to your search input and display its results in a dropdown UI. If you want to find more
// details on how works DocSearch, check the docs of DocSearch.
docsearch({
apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
appId: 'RAI69RXRSK',
indexName: 'apache_spark',
inputSelector: '#docsearch-input',
enhancedSearchInput: true,
algoliaOptions: {
'facetFilters': ["version:4.1.0-preview1"]
},
debug: false // Set debug to true if you want to inspect the dropdown
});
</script>
<!-- MathJax Section -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
TeX: { equationNumbers: { autoNumber: "AMS" } }
});
</script>
<script>
// Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS.
// We could use "//cdn.mathjax...", but that won't support "file://".
(function(d, script) {
script = d.createElement('script');
script.type = 'text/javascript';
script.async = true;
script.onload = function(){
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
});
};
script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') +
'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' +
'?config=TeX-AMS-MML_HTMLorMML';
d.getElementsByTagName('head')[0].appendChild(script);
}(document));
</script>
</body>
</html>