blob: 7cfb8232a6e1b741287199c84081bb4ae5b4199e [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Getting Started &#8212; PySpark 3.5.2 documentation</title>
<link href="../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" href="../_static/styles/pydata-sphinx-theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/language_data.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/getting_started/index.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Installation" href="install.html" />
<link rel="prev" title="PySpark Overview" href="../index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl">
<div id="navbar-start">
<a class="navbar-brand" href="../index.html">
<img src="../_static/spark-logo-reverse.png" class="logo" alt="logo">
</a>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse">
<div id="navbar-center" class="mr-auto">
<div class="navbar-center-item">
<ul id="navbar-main-elements" class="navbar-nav">
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../index.html">
Overview
</a>
</li>
<li class="toctree-l1 current active nav-item">
<a class="current reference internal nav-link" href="#">
Getting Started
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../user_guide/index.html">
User Guides
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../reference/index.html">
API Reference
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../development/index.html">
Development
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</div>
</div>
<div id="navbar-end">
<div class="navbar-end-item">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
3.5.2
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "getting_started/index.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
</div>
</div>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="install.html">
Installation
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="quickstart_df.html">
Quickstart: DataFrame
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="quickstart_connect.html">
Quickstart: Spark Connect
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="quickstart_ps.html">
Quickstart: Pandas API on Spark
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="testing_pyspark.html">
Testing PySpark
</a>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<nav id="bd-toc-nav">
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="getting-started">
<h1>Getting Started<a class="headerlink" href="#getting-started" title="Permalink to this headline">ΒΆ</a></h1>
<p>This page summarizes the basic steps required to setup and get started with PySpark.
There are more guides shared with other languages such as
<a class="reference external" href="https://spark.apache.org/docs/latest/quick-start.html">Quick Start</a> in Programming Guides
at <a class="reference external" href="https://spark.apache.org/docs/latest/index.html#where-to-go-from-here">the Spark documentation</a>.</p>
<p>There are live notebooks where you can try PySpark out without any other step:</p>
<ul class="simple">
<li><p><a class="reference external" href="https://mybinder.org/v2/gh/apache/spark/bb7846dd487?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_df.ipynb">Live Notebook: DataFrame</a></p></li>
<li><p><a class="reference external" href="https://mybinder.org/v2/gh/apache/spark/bb7846dd487?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_connect.ipynb">Live Notebook: Spark Connect</a></p></li>
<li><p><a class="reference external" href="https://mybinder.org/v2/gh/apache/spark/bb7846dd487?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_ps.ipynb">Live Notebook: pandas API on Spark</a></p></li>
</ul>
<p>The list below is the contents of this quickstart page:</p>
<div class="toctree-wrapper compound">
<ul>
<li class="toctree-l1"><a class="reference internal" href="install.html">Installation</a><ul>
<li class="toctree-l2"><a class="reference internal" href="install.html#python-versions-supported">Python Versions Supported</a></li>
<li class="toctree-l2"><a class="reference internal" href="install.html#using-pypi">Using PyPI</a></li>
<li class="toctree-l2"><a class="reference internal" href="install.html#using-conda">Using Conda</a></li>
<li class="toctree-l2"><a class="reference internal" href="install.html#manually-downloading">Manually Downloading</a></li>
<li class="toctree-l2"><a class="reference internal" href="install.html#installing-from-source">Installing from Source</a></li>
<li class="toctree-l2"><a class="reference internal" href="install.html#dependencies">Dependencies</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="quickstart_df.html">Quickstart: DataFrame</a><ul>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#DataFrame-Creation">DataFrame Creation</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#Viewing-Data">Viewing Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#Selecting-and-Accessing-Data">Selecting and Accessing Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#Applying-a-Function">Applying a Function</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#Grouping-Data">Grouping Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#Getting-Data-In/Out">Getting Data In/Out</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_df.html#Working-with-SQL">Working with SQL</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="quickstart_connect.html">Quickstart: Spark Connect</a><ul>
<li class="toctree-l2"><a class="reference internal" href="quickstart_connect.html#Launch-Spark-server-with-Spark-Connect">Launch Spark server with Spark Connect</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_connect.html#Connect-to-Spark-Connect-server">Connect to Spark Connect server</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_connect.html#Create-DataFrame">Create DataFrame</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="quickstart_ps.html">Quickstart: Pandas API on Spark</a><ul>
<li class="toctree-l2"><a class="reference internal" href="quickstart_ps.html#Object-Creation">Object Creation</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_ps.html#Missing-Data">Missing Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_ps.html#Operations">Operations</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_ps.html#Grouping">Grouping</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_ps.html#Plotting">Plotting</a></li>
<li class="toctree-l2"><a class="reference internal" href="quickstart_ps.html#Getting-data-in/out">Getting data in/out</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="testing_pyspark.html">Testing PySpark</a><ul>
<li class="toctree-l2"><a class="reference internal" href="testing_pyspark.html#Build-a-PySpark-Application">Build a PySpark Application</a></li>
<li class="toctree-l2"><a class="reference internal" href="testing_pyspark.html#Testing-your-PySpark-Application">Testing your PySpark Application</a></li>
<li class="toctree-l2"><a class="reference internal" href="testing_pyspark.html#Putting-It-All-Together!">Putting It All Together!</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="../index.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">PySpark Overview</p>
</div>
</a>
<a class='right-next' id="next-link" href="install.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Installation</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright .<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br>
</p>
</div>
</div>
</footer>
</body>
</html>