site/docs/3.1.1/api/python/development/debugging.html - spark-website - Git at Google


 <!DOCTYPE html>

 <html>
   <head>
     <meta charset="utf-8" />
     <title>Debugging PySpark &#8212; PySpark 3.1.1 documentation</title>

   <link rel="stylesheet" href="../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">


   <link rel="stylesheet"
     href="../_static/vendor/fontawesome/5.13.0/css/all.min.css">
   <link rel="preload" as="font" type="font/woff2" crossorigin
     href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
   <link rel="preload" as="font" type="font/woff2" crossorigin
     href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">


   <link rel="stylesheet"
     href="../_static/vendor/open-sans_all/1.44.1/index.css">
   <link rel="stylesheet"
     href="../_static/vendor/lato_latin-ext/1.44.1/index.css">


     <link rel="stylesheet" href="../_static/basic.css" type="text/css" />
     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
     <link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" />

   <link rel="preload" as="script" href="../_static/js/index.3da636dd464baa7582d2.js">

     <script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
     <script src="../_static/jquery.js"></script>
     <script src="../_static/underscore.js"></script>
     <script src="../_static/doctools.js"></script>
     <script src="../_static/language_data.js"></script>
     <script src="../_static/copybutton.js"></script>
     <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
     <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
     <script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
     <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/development/debugging.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Setting up IDEs" href="setting_ide.html" />
     <link rel="prev" title="Testing PySpark" href="testing.html" />
     <meta name="viewport" content="width=device-width, initial-scale=1" />
     <meta name="docsearch:language" content="en" />
     <!-- Matomo -->
     <script type="text/javascript">
         var _paq = window._paq = window._paq || [];
         /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
         _paq.push(["disableCookies"]);
         _paq.push(['trackPageView']);
         _paq.push(['enableLinkTracking']);
         (function() {
             var u="https://analytics.apache.org/";
             _paq.push(['setTrackerUrl', u+'matomo.php']);
             _paq.push(['setSiteId', '40']);
             var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
             g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
         })();
     </script>
     <!-- End Matomo Code -->
   </head>
   <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">

     <nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
 <div class="container-xl">

     <a class="navbar-brand" href="../index.html">

       <img src="../_static/spark-logo-reverse.png" class="logo" alt="logo" />

     </a>
     <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
         <span class="navbar-toggler-icon"></span>
     </button>

     <div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
       <ul id="navbar-main-elements" class="navbar-nav mr-auto">


         <li class="nav-item ">
             <a class="nav-link" href="../getting_started/index.html">Getting Started</a>
         </li>

         <li class="nav-item ">
             <a class="nav-link" href="../user_guide/index.html">User Guide</a>
         </li>

         <li class="nav-item ">
             <a class="nav-link" href="../reference/index.html">API Reference</a>
         </li>

         <li class="nav-item active">
             <a class="nav-link" href="index.html">Development</a>
         </li>

         <li class="nav-item ">
             <a class="nav-link" href="../migration_guide/index.html">Migration Guide</a>
         </li>


       </ul>


       <ul class="navbar-nav">


       </ul>
     </div>
 </div>
     </nav>


     <div class="container-xl">
       <div class="row">

           <div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../search.html" method="get">
   <i class="icon fas fa-search"></i>
   <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
 </form>
 <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">

     <div class="bd-toc-item active">


     <ul class="nav bd-sidenav">


                 <li class="">
                     <a href="contributing.html">Contributing to PySpark</a>
                 </li>


                 <li class="">
                     <a href="testing.html">Testing PySpark</a>
                 </li>


                 <li class="active">
                     <a href="">Debugging PySpark</a>
                 </li>


                 <li class="">
                     <a href="setting_ide.html">Setting up IDEs</a>
                 </li>


       </ul>

   </nav>
           </div>


           <div class="d-none d-xl-block col-xl-2 bd-toc">

 <div class="tocsection onthispage pt-5 pb-3">
     <i class="fas fa-list"></i> On this page
 </div>

 <nav id="bd-toc-nav">
     <ul class="nav section-nav flex-column">

         <li class="nav-item toc-entry toc-h2">
             <a href="#remote-debugging-pycharm-professional" class="nav-link">Remote Debugging (PyCharm Professional)</a><ul class="nav section-nav flex-column">

         <li class="nav-item toc-entry toc-h3">
             <a href="#driver-side" class="nav-link">Driver Side</a>
         </li>

         <li class="nav-item toc-entry toc-h3">
             <a href="#executor-side" class="nav-link">Executor Side</a>
         </li>

             </ul>
         </li>

         <li class="nav-item toc-entry toc-h2">
             <a href="#checking-resource-usage-top-and-ps" class="nav-link">Checking Resource Usage (top and ps)</a><ul class="nav section-nav flex-column">

         <li class="nav-item toc-entry toc-h3">
             <a href="#id2" class="nav-link">Driver Side</a>
         </li>

         <li class="nav-item toc-entry toc-h3">
             <a href="#id3" class="nav-link">Executor Side</a>
         </li>

             </ul>
         </li>

         <li class="nav-item toc-entry toc-h2">
             <a href="#profiling-memory-usage-memory-profiler" class="nav-link">Profiling Memory Usage (Memory Profiler)</a>
         </li>

         <li class="nav-item toc-entry toc-h2">
             <a href="#identifying-hot-loops-python-profilers" class="nav-link">Identifying Hot Loops (Python Profilers)</a><ul class="nav section-nav flex-column">

         <li class="nav-item toc-entry toc-h3">
             <a href="#id4" class="nav-link">Driver Side</a>
         </li>

         <li class="nav-item toc-entry toc-h3">
             <a href="#id5" class="nav-link">Executor Side</a>
         </li>

             </ul>
         </li>

     </ul>
 </nav>


           </div>


           <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">

               <div>

   <div class="section" id="debugging-pyspark">
 <h1>Debugging PySpark<a class="headerlink" href="#debugging-pyspark" title="Permalink to this headline">¶</a></h1>
 <p>PySpark uses Spark as an engine. PySpark uses <a class="reference external" href="https://www.py4j.org/">Py4J</a> to leverage Spark to submit and computes the jobs.</p>
 <p>On the driver side, PySpark communicates with the driver on JVM by using <a class="reference external" href="https://www.py4j.org/">Py4J</a>.
 When <a class="reference internal" href="../reference/api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession" title="pyspark.sql.SparkSession"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.SparkSession</span></code></a> or <a class="reference internal" href="../reference/api/pyspark.SparkContext.html#pyspark.SparkContext" title="pyspark.SparkContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.SparkContext</span></code></a> is created and initialized, PySpark launches a JVM
 to communicate.</p>
 <p>On the executor side, Python workers execute and handle Python native functions or data. They are not launched if
 a PySpark application does not require interaction between Python workers and JVMs. They are lazily launched only when
 Python native functions or data have to be handled, for example, when you execute pandas UDFs or
 PySpark RDD APIs.</p>
 <p>This page focuses on debugging Python side of PySpark on both driver and executor sides instead of focusing on debugging
 with JVM. Profiling and debugging JVM is described at <a class="reference external" href="https://spark.apache.org/developer-tools.html">Useful Developer Tools</a>.</p>
 <p>Note that,</p>
 <ul class="simple">
 <li><p>If you are running locally, you can directly debug the driver side via using your IDE without the remote debug feature. Setting PySpark with IDEs is documented <a class="reference internal" href="setting_ide.html#pycharm"><span class="std std-ref">here</span></a>.</p></li>
 <li><p><em>There are many other ways of debugging PySpark applications</em>. For example, you can remotely debug by using the open source <a class="reference external" href="https://www.pydev.org/manual_adv_remote_debugger.html">Remote Debugger</a> instead of using PyCharm Professional documented here.</p></li>
 </ul>
 <div class="section" id="remote-debugging-pycharm-professional">
 <h2>Remote Debugging (PyCharm Professional)<a class="headerlink" href="#remote-debugging-pycharm-professional" title="Permalink to this headline">¶</a></h2>
 <p>This section describes remote debugging on both driver and executor sides within a single machine to demonstrate easily.
 The ways of debugging PySpark on the executor side is different from doing in the driver. Therefore, they will be demonstrated respectively.
 In order to debug PySpark applications on other machines, please refer to the full instructions that are specific
 to PyCharm, documented <a class="reference external" href="https://www.jetbrains.com/help/pycharm/remote-debugging-with-product.html">here</a>.</p>
 <p>Firstly, choose <strong>Edit Configuration…</strong> from the <em>Run</em> menu. It opens the <strong>Run/Debug Configurations dialog</strong>.
 You have to click <code class="docutils literal notranslate"><span class="pre">+</span></code> configuration on the toolbar, and from the list of available configurations, select <strong>Python Debug Server</strong>.
 Enter the name of this new configuration, for example, <code class="docutils literal notranslate"><span class="pre">MyRemoteDebugger</span></code> and also specify the port number, for example <code class="docutils literal notranslate"><span class="pre">12345</span></code>.</p>
 <img alt="PyCharm remote debugger setting" src="../_images/pyspark-remote-debug1.png" />
 <div class="line-block">
 <div class="line">After that, you should install the corresponding version of the <code class="docutils literal notranslate"><span class="pre">pydevd-pycharm</span></code> package in all the machines which will connect to your PyCharm debugger. In the previous dialog, it shows the command to install.</div>
 </div>
 <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>pip install pydevd-pycharm~=&lt;version of PyCharm on the local machine&gt;
 </pre></div>
 </div>
 <div class="section" id="driver-side">
 <h3>Driver Side<a class="headerlink" href="#driver-side" title="Permalink to this headline">¶</a></h3>
 <p>To debug on the driver side, your application should be able to connect to the debugging server. Copy and paste the codes
 with <code class="docutils literal notranslate"><span class="pre">pydevd_pycharm.settrace</span></code> to the top of your PySpark script. Suppose the script name is <code class="docutils literal notranslate"><span class="pre">app.py</span></code>:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">echo</span> <span class="s2">&quot;#======================Copy and paste from the previous dialog===========================</span>
 <span class="s2">import pydevd_pycharm</span>
 <span class="s2">pydevd_pycharm.settrace(&#39;localhost&#39;, port=12345, stdoutToServer=True, stderrToServer=True)</span>
 <span class="s2">#========================================================================================</span>
 <span class="s2"># Your PySpark application codes:</span>
 <span class="s2">from pyspark.sql import SparkSession</span>
 <span class="s2">spark = SparkSession.builder.getOrCreate()</span>
 <span class="s2">spark.range(10).show()&quot;</span> &gt; app.py
 </pre></div>
 </div>
 <p>Start to debug with your <code class="docutils literal notranslate"><span class="pre">MyRemoteDebugger</span></code>.</p>
 <img alt="PyCharm run remote debugger" src="../_images/pyspark-remote-debug2.png" />
 <div class="line-block">
 <div class="line">After that, submit your application. This will connect to your PyCharm debugging server and enable you to debug on the driver side remotely.</div>
 </div>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>spark-submit app.py
 </pre></div>
 </div>
 </div>
 <div class="section" id="executor-side">
 <h3>Executor Side<a class="headerlink" href="#executor-side" title="Permalink to this headline">¶</a></h3>
 <p>To debug on the executor side, prepare a Python file as below in your current working directory.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">echo</span> <span class="s2">&quot;from pyspark import daemon, worker</span>
 <span class="s2">def remote_debug_wrapped(*args, **kwargs):</span>
 <span class="s2">    #======================Copy and paste from the previous dialog===========================</span>
 <span class="s2">    import pydevd_pycharm</span>
 <span class="s2">    pydevd_pycharm.settrace(&#39;localhost&#39;, port=12345, stdoutToServer=True, stderrToServer=True)</span>
 <span class="s2">    #========================================================================================</span>
 <span class="s2">    worker.main(*args, **kwargs)</span>
 <span class="s2">daemon.worker_main = remote_debug_wrapped</span>
 <span class="s2">if __name__ == &#39;__main__&#39;:</span>
 <span class="s2">    daemon.manager()&quot;</span> &gt; remote_debug.py
 </pre></div>
 </div>
 <p>You will use this file as the Python worker in your PySpark applications by using the <code class="docutils literal notranslate"><span class="pre">spark.python.daemon.module</span></code> configuration.
 Run the <code class="docutils literal notranslate"><span class="pre">pyspark</span></code> shell with the configuration below:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pyspark --conf spark.python.daemon.module<span class="o">=</span>remote_debug
 </pre></div>
 </div>
 <p>Now you’re ready to remotely debug. Start to debug with your <code class="docutils literal notranslate"><span class="pre">MyRemoteDebugger</span></code>.</p>
 <img alt="PyCharm run remote debugger" src="../_images/pyspark-remote-debug2.png" />
 <div class="line-block">
 <div class="line">After that, run a job that creates Python workers, for example, as below:</div>
 </div>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">spark</span><span class="o">.</span><span class="n">range</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">repartition</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">rdd</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span>
 </pre></div>
 </div>
 </div>
 </div>
 <div class="section" id="checking-resource-usage-top-and-ps">
 <h2>Checking Resource Usage (<code class="docutils literal notranslate"><span class="pre">top</span></code> and <code class="docutils literal notranslate"><span class="pre">ps</span></code>)<a class="headerlink" href="#checking-resource-usage-top-and-ps" title="Permalink to this headline">¶</a></h2>
 <p>The Python processes on the driver and executor can be checked via typical ways such as <code class="docutils literal notranslate"><span class="pre">top</span></code> and <code class="docutils literal notranslate"><span class="pre">ps</span></code> commands.</p>
 <div class="section" id="id2">
 <h3>Driver Side<a class="headerlink" href="#id2" title="Permalink to this headline">¶</a></h3>
 <p>On the driver side, you can get the process id from your PySpark shell easily as below to know the process id and resources.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">os</span><span class="p">;</span> <span class="n">os</span><span class="o">.</span><span class="n">getpid</span><span class="p">()</span>
 <span class="go">18482</span>
 </pre></div>
 </div>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ps -fe <span class="m">18482</span>
 </pre></div>
 </div>
 <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>UID   PID  PPID   C STIME  TTY           TIME CMD
 000 18482 12345   0 0:00PM ttys001    0:00.00 /.../python
 </pre></div>
 </div>
 </div>
 <div class="section" id="id3">
 <h3>Executor Side<a class="headerlink" href="#id3" title="Permalink to this headline">¶</a></h3>
 <p>To check on the executor side, you can simply <code class="docutils literal notranslate"><span class="pre">grep</span></code> them to figure out the process
 ids and relevant resources because Python workers are forked from <code class="docutils literal notranslate"><span class="pre">pyspark.daemon</span></code>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ps -fe <span class="p">|</span> grep pyspark.daemon
 </pre></div>
 </div>
 <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>000 12345     1   0  0:00PM ttys000    0:00.00 /.../python -m pyspark.daemon
 000 12345     1   0  0:00PM ttys000    0:00.00 /.../python -m pyspark.daemon
 000 12345     1   0  0:00PM ttys000    0:00.00 /.../python -m pyspark.daemon
 000 12345     1   0  0:00PM ttys000    0:00.00 /.../python -m pyspark.daemon
 ...
 </pre></div>
 </div>
 </div>
 </div>
 <div class="section" id="profiling-memory-usage-memory-profiler">
 <h2>Profiling Memory Usage (Memory Profiler)<a class="headerlink" href="#profiling-memory-usage-memory-profiler" title="Permalink to this headline">¶</a></h2>
 <p><a class="reference external" href="https://github.com/pythonprofilers/memory_profiler">memory_profiler</a> is one of the profilers that allow you to
 check the memory usage line by line. This method documented here <em>only works for the driver side</em>.</p>
 <p>Unless you are running your driver program in another machine (e.g., YARN cluster mode), this useful tool can be used
 to debug the memory usage on driver side easily. Suppose your PySpark script name is <code class="docutils literal notranslate"><span class="pre">profile_memory.py</span></code>.
 You can profile it as below.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">echo</span> <span class="s2">&quot;from pyspark.sql import SparkSession</span>
 <span class="s2">#===Your function should be decorated with @profile===</span>
 <span class="s2">from memory_profiler import profile</span>
 <span class="s2">@profile</span>
 <span class="s2">#=====================================================</span>
 <span class="s2">def my_func():</span>
 <span class="s2">    session = SparkSession.builder.getOrCreate()</span>
 <span class="s2">    df = session.range(10000)</span>
 <span class="s2">    return df.collect()</span>
 <span class="s2">if __name__ == &#39;__main__&#39;:</span>
 <span class="s2">    my_func()&quot;</span> &gt; profile_memory.py
 </pre></div>
 </div>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python -m memory_profiler profile_memory.py
 </pre></div>
 </div>
 <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>Filename: profile_memory.py

 Line #    Mem usage    Increment   Line Contents
 ================================================
 ...
      6                             def my_func():
      7     51.5 MiB      0.6 MiB       session = SparkSession.builder.getOrCreate()
      8     51.5 MiB      0.0 MiB       df = session.range(10000)
      9     54.4 MiB      2.8 MiB       return df.collect()
 </pre></div>
 </div>
 </div>
 <div class="section" id="identifying-hot-loops-python-profilers">
 <h2>Identifying Hot Loops (Python Profilers)<a class="headerlink" href="#identifying-hot-loops-python-profilers" title="Permalink to this headline">¶</a></h2>
 <p><a class="reference external" href="https://docs.python.org/3/library/profile.html">Python Profilers</a> are useful built-in features in Python itself. These
 provide deterministic profiling of Python programs with a lot of useful statistics. This section describes how to use it on
 both driver and executor sides in order to identify expensive or hot code paths.</p>
 <div class="section" id="id4">
 <h3>Driver Side<a class="headerlink" href="#id4" title="Permalink to this headline">¶</a></h3>
 <p>To use this on driver side, you can use it as you would do for regular Python programs because PySpark on driver side is a
 regular Python process unless you are running your driver program in another machine (e.g., YARN cluster mode).</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">echo</span> <span class="s2">&quot;from pyspark.sql import SparkSession</span>
 <span class="s2">spark = SparkSession.builder.getOrCreate()</span>
 <span class="s2">spark.range(10).show()&quot;</span> &gt; app.py
 </pre></div>
 </div>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python -m cProfile app.py
 </pre></div>
 </div>
 <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>...
      129215 function calls (125446 primitive calls) in 5.926 seconds

    Ordered by: standard name

    ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  1198/405    0.001    0.000    0.083    0.000 &lt;frozen importlib._bootstrap&gt;:1009(_handle_fromlist)
       561    0.001    0.000    0.001    0.000 &lt;frozen importlib._bootstrap&gt;:103(release)
       276    0.000    0.000    0.000    0.000 &lt;frozen importlib._bootstrap&gt;:143(__init__)
       276    0.000    0.000    0.002    0.000 &lt;frozen importlib._bootstrap&gt;:147(__enter__)
 ...
 </pre></div>
 </div>
 </div>
 <div class="section" id="id5">
 <h3>Executor Side<a class="headerlink" href="#id5" title="Permalink to this headline">¶</a></h3>
 <p>To use this on executor side, PySpark provides remote <a class="reference external" href="https://docs.python.org/3/library/profile.html">Python Profilers</a> for
 executor side, which can be enabled by setting <code class="docutils literal notranslate"><span class="pre">spark.python.profile</span></code> configuration to <code class="docutils literal notranslate"><span class="pre">true</span></code>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pyspark --conf spark.python.profile<span class="o">=</span><span class="nb">true</span>
 </pre></div>
 </div>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">100</span><span class="p">))</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">rdd</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
 <span class="go">100</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">sc</span><span class="o">.</span><span class="n">show_profiles</span><span class="p">()</span>
 <span class="go">============================================================</span>
 <span class="go">Profile of RDD&lt;id=1&gt;</span>
 <span class="go">============================================================</span>
 <span class="go">         728 function calls (692 primitive calls) in 0.004 seconds</span>

 <span class="go">   Ordered by: internal time, cumulative time</span>

 <span class="go">   ncalls  tottime  percall  cumtime  percall filename:lineno(function)</span>
 <span class="go">       12    0.001    0.000    0.001    0.000 serializers.py:210(load_stream)</span>
 <span class="go">       12    0.000    0.000    0.000    0.000 {built-in method _pickle.dumps}</span>
 <span class="go">       12    0.000    0.000    0.001    0.000 serializers.py:252(dump_stream)</span>
 <span class="go">       12    0.000    0.000    0.001    0.000 context.py:506(f)</span>
 <span class="gp">...</span>
 </pre></div>
 </div>
 <p>This feature is supported only with RDD APIs.</p>
 </div>
 </div>
 </div>


               </div>


               <div class='prev-next-bottom'>

     <a class='left-prev' id="prev-link" href="testing.html" title="previous page">Testing PySpark</a>
     <a class='right-next' id="next-link" href="setting_ide.html" title="next page">Setting up IDEs</a>

               </div>

           </main>


       </div>
     </div>


   <script src="../_static/js/index.3da636dd464baa7582d2.js"></script>


     <footer class="footer mt-5 mt-md-0">
   <div class="container">
     <p>
           &copy; Copyright .<br/>
         Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
     </p>
   </div>
 </footer>
   </body>
 </html>