blob: 6985b0bb0de89d448e45f19bde7414b5e42ffce3 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.pandas.DataFrame.iloc &#8212; PySpark 3.4.3 documentation</title>
<link rel="stylesheet" href="../../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.iloc.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="pyspark.pandas.DataFrame.items" href="pyspark.pandas.DataFrame.items.html" />
<link rel="prev" title="pyspark.pandas.DataFrame.loc" href="pyspark.pandas.DataFrame.loc.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../../index.html">Overview</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../user_guide/index.html">User Guides</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../migration_guide/index.html">Migration Guides</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="../../pyspark.sql/index.html">Spark SQL</a>
</li>
<li class="active">
<a href="../index.html">Pandas API on Spark</a>
<ul>
<li class="">
<a href="../io.html">Input/Output</a>
</li>
<li class="">
<a href="../general_functions.html">General functions</a>
</li>
<li class="">
<a href="../series.html">Series</a>
</li>
<li class="active">
<a href="../frame.html">DataFrame</a>
</li>
<li class="">
<a href="../indexing.html">Index objects</a>
</li>
<li class="">
<a href="../window.html">Window</a>
</li>
<li class="">
<a href="../groupby.html">GroupBy</a>
</li>
<li class="">
<a href="../resampling.html">Resampling</a>
</li>
<li class="">
<a href="../ml.html">Machine Learning utilities</a>
</li>
<li class="">
<a href="../extensions.html">Extensions</a>
</li>
</ul>
</li>
<li class="">
<a href="../../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="">
<a href="../../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../../pyspark.streaming.html">Spark Streaming (Legacy)</a>
</li>
<li class="">
<a href="../../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../../pyspark.resource.html">Resource Management</a>
</li>
<li class="">
<a href="../../pyspark.errors.html">Errors</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="pyspark-pandas-dataframe-iloc">
<h1>pyspark.pandas.DataFrame.iloc<a class="headerlink" href="#pyspark-pandas-dataframe-iloc" title="Permalink to this headline">¶</a></h1>
<dl class="py method">
<dt id="pyspark.pandas.DataFrame.iloc">
<em class="property">property </em><code class="sig-prename descclassname">DataFrame.</code><code class="sig-name descname">iloc</code><a class="headerlink" href="#pyspark.pandas.DataFrame.iloc" title="Permalink to this definition">¶</a></dt>
<dd><p>Purely integer-location based indexing for selection by position.</p>
<p><code class="docutils literal notranslate"><span class="pre">.iloc[]</span></code> is primarily integer position based (from <code class="docutils literal notranslate"><span class="pre">0</span></code> to
<code class="docutils literal notranslate"><span class="pre">length-1</span></code> of the axis), but may also be used with a conditional boolean Series.</p>
<p>Allowed inputs are:</p>
<ul class="simple">
<li><p>An integer for column selection, e.g. <code class="docutils literal notranslate"><span class="pre">5</span></code>.</p></li>
<li><p>A list or array of integers for row selection with distinct index values,
e.g. <code class="docutils literal notranslate"><span class="pre">[3,</span> <span class="pre">4,</span> <span class="pre">0]</span></code></p></li>
<li><p>A list or array of integers for column selection, e.g. <code class="docutils literal notranslate"><span class="pre">[4,</span> <span class="pre">3,</span> <span class="pre">0]</span></code>.</p></li>
<li><p>A boolean array for column selection.</p></li>
<li><p>A slice object with ints for row and column selection, e.g. <code class="docutils literal notranslate"><span class="pre">1:7</span></code>.</p></li>
</ul>
<p>Not allowed inputs which pandas allows are:</p>
<ul class="simple">
<li><p>A list or array of integers for row selection with duplicated indexes,
e.g. <code class="docutils literal notranslate"><span class="pre">[4,</span> <span class="pre">4,</span> <span class="pre">0]</span></code>.</p></li>
<li><p>A boolean array for row selection.</p></li>
<li><p>A <code class="docutils literal notranslate"><span class="pre">callable</span></code> function with one argument (the calling Series, DataFrame
or Panel) and that returns valid output for indexing (one of the above).
This is useful in method chains when you don’t have a reference to the
calling object but would like to base your selection on some value.</p></li>
</ul>
<p><code class="docutils literal notranslate"><span class="pre">.iloc</span></code> will raise <code class="docutils literal notranslate"><span class="pre">IndexError</span></code> if a requested indexer is
out-of-bounds, except <em>slice</em> indexers which allow out-of-bounds
indexing (this conforms with python/numpy <em>slice</em> semantics).</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<dl class="simple">
<dt><a class="reference internal" href="pyspark.pandas.DataFrame.loc.html#pyspark.pandas.DataFrame.loc" title="pyspark.pandas.DataFrame.loc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame.loc</span></code></a></dt><dd><p>Purely label-location based indexer for selection by label.</p>
</dd>
<dt><a class="reference internal" href="pyspark.pandas.Series.iloc.html#pyspark.pandas.Series.iloc" title="pyspark.pandas.Series.iloc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Series.iloc</span></code></a></dt><dd><p>Purely integer-location based indexing for selection by position.</p>
</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">mydict</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">200</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">300</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">400</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2000</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3000</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">4000</span> <span class="p">}]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">mydict</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">2 1000 2000 3000 4000</span>
</pre></div>
</div>
<p><strong>Indexing just the rows</strong></p>
<p>A scalar integer for row selection.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="go">a 100</span>
<span class="go">b 200</span>
<span class="go">c 300</span>
<span class="go">d 400</span>
<span class="go">Name: 1, dtype: int64</span>
</pre></div>
</div>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
</pre></div>
</div>
<p>With a <cite>slice</cite> object.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="mi">3</span><span class="p">]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">2 1000 2000 3000 4000</span>
</pre></div>
</div>
<p><strong>Indexing both axes</strong></p>
<p>You can mix the indexer types for the index and columns. Use <code class="docutils literal notranslate"><span class="pre">:</span></code> to
select the entire axis.</p>
<p>With scalar integers.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="go">0 2</span>
<span class="go">Name: b, dtype: int64</span>
</pre></div>
</div>
<p>With lists of integers.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="mi">2</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">]]</span>
<span class="go"> b d</span>
<span class="go">0 2 4</span>
<span class="go">1 200 400</span>
</pre></div>
</div>
<p>With <cite>slice</cite> objects.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
<span class="go"> a b c</span>
<span class="go">0 1 2 3</span>
<span class="go">1 100 200 300</span>
</pre></div>
</div>
<p>With a boolean array whose length matches the columns.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]]</span>
<span class="go"> a c</span>
<span class="go">0 1 3</span>
<span class="go">1 100 300</span>
<span class="go">2 1000 3000</span>
</pre></div>
</div>
<p><strong>Setting values</strong></p>
<p>Setting value for all items matching the list of labels.</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">50</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 50 300 400</span>
<span class="go">2 1000 50 3000 4000</span>
</pre></div>
</div>
<p>Setting value for an entire row</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 10 10 10 10</span>
<span class="go">1 100 50 300 400</span>
<span class="go">2 1000 50 3000 4000</span>
</pre></div>
</div>
<p>Set value for an entire column</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="mi">30</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 10 10 30 10</span>
<span class="go">1 100 50 30 400</span>
<span class="go">2 1000 50 30 4000</span>
</pre></div>
</div>
<p>Set value for an entire list of columns</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">100</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 10 10 100 100</span>
<span class="go">1 100 50 100 100</span>
<span class="go">2 1000 50 100 100</span>
</pre></div>
</div>
<p>Set value with Series</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">3</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">3</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 10 10 100 200</span>
<span class="go">1 100 50 100 200</span>
<span class="go">2 1000 50 100 200</span>
</pre></div>
</div>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.pandas.DataFrame.loc.html" title="previous page">pyspark.pandas.DataFrame.loc</a>
<a class='right-next' id="next-link" href="pyspark.pandas.DataFrame.items.html" title="next page">pyspark.pandas.DataFrame.items</a>
</div>
</main>
</div>
</div>
<script src="../../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>