| |
| |
| <!DOCTYPE html> |
| |
| |
| <html > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>pyspark.sql.column — PySpark 4.0.0-preview1 documentation</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; |
| </script> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| |
| |
| <link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" /> |
| |
| <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> |
| |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" /> |
| <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" /> |
| |
| <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> |
| <script src="../../../_static/jquery.js"></script> |
| <script src="../../../_static/underscore.js"></script> |
| <script src="../../../_static/doctools.js"></script> |
| <script src="../../../_static/clipboard.min.js"></script> |
| <script src="../../../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/sql/column';</script> |
| <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/sql/column.html" /> |
| <link rel="search" title="Search" href="../../../search.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="None"> |
| |
| |
| <!-- Matomo --> |
| <script type="text/javascript"> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '40']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <a class="skip-link" href="#main-content">Skip to main content</a> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__primary" |
| id="__primary"/> |
| <label class="overlay overlay-primary" for="__primary"></label> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__secondary" |
| id="__secondary"/> |
| <label class="overlay overlay-secondary" for="__secondary"></label> |
| |
| <div class="search-button__wrapper"> |
| <div class="search-button__overlay"></div> |
| <div class="search-button__search-container"> |
| <form class="bd-search d-flex align-items-center" |
| action="../../../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| id="search-input" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form></div> |
| </div> |
| |
| <nav class="bd-header navbar navbar-expand-lg bd-navbar"> |
| <div class="bd-header__inner bd-page-width"> |
| <label class="sidebar-toggle primary-toggle" for="__primary"> |
| <span class="fa-solid fa-bars"></span> |
| </label> |
| |
| <div class="navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| <a class="navbar-brand logo" href="../../../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/> |
| <script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| |
| <div class="col-lg-9 navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"><nav class="navbar-nav"> |
| <p class="sidebar-header-items__title" |
| role="heading" |
| aria-level="1" |
| aria-label="Site Navigation"> |
| Site Navigation |
| </p> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| <div class="navbar-item"><!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 4.0.0-preview1 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/sql/column.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script></div> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> |
| <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> |
| <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> |
| <label class="sr-only">GitHub</label></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> |
| <label class="sr-only">PyPI</label></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| |
| </div> |
| |
| </nav> |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| <div class="bd-sidebar-primary bd-sidebar hide-on-wide"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| <div class="navbar-item"><nav class="navbar-nav"> |
| <p class="sidebar-header-items__title" |
| role="heading" |
| aria-level="1" |
| aria-label="Site Navigation"> |
| Site Navigation |
| </p> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 4.0.0-preview1 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/sql/column.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script></div> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> |
| <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> |
| <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> |
| <label class="sr-only">GitHub</label></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> |
| <label class="sr-only">PyPI</label></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| </div> |
| |
| <div id="rtd-footer-container"></div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| |
| |
| <nav aria-label="Breadcrumbs"> |
| <ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../../../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page">pyspark.sql.column</li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article" role="main"> |
| |
| <h1>Source code for pyspark.sql.column</h1><div class="highlight"><pre> |
| <span></span><span class="c1">#</span> |
| <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> |
| <span class="c1"># contributor license agreements. See the NOTICE file distributed with</span> |
| <span class="c1"># this work for additional information regarding copyright ownership.</span> |
| <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> |
| <span class="c1"># (the "License"); you may not use this file except in compliance with</span> |
| <span class="c1"># the License. You may obtain a copy of the License at</span> |
| <span class="c1">#</span> |
| <span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="c1">#</span> |
| <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> |
| <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="c1"># See the License for the specific language governing permissions and</span> |
| <span class="c1"># limitations under the License.</span> |
| <span class="c1">#</span> |
| |
| <span class="c1"># mypy: disable-error-code="empty-body"</span> |
| |
| <span class="kn">import</span> <span class="nn">sys</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">overload</span><span class="p">,</span> |
| <span class="n">Any</span><span class="p">,</span> |
| <span class="n">TYPE_CHECKING</span><span class="p">,</span> |
| <span class="n">Union</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="kn">from</span> <span class="nn">pyspark.sql.utils</span> <span class="kn">import</span> <span class="n">dispatch_col_method</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="n">DataType</span> |
| <span class="kn">from</span> <span class="nn">pyspark.errors</span> <span class="kn">import</span> <span class="n">PySparkValueError</span> |
| |
| <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql._typing</span> <span class="kn">import</span> <span class="n">LiteralType</span><span class="p">,</span> <span class="n">DecimalLiteral</span><span class="p">,</span> <span class="n">DateTimeLiteral</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql.window</span> <span class="kn">import</span> <span class="n">WindowSpec</span> |
| |
| <span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"Column"</span><span class="p">]</span> |
| |
| |
| <div class="viewcode-block" id="Column"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.html#pyspark.sql.Column">[docs]</a><span class="k">class</span> <span class="nc">Column</span><span class="p">:</span> |
| |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> A column in a DataFrame.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Column instances can be created by</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| |
| <span class="sd"> Select a column out of a DataFrame</span> |
| <span class="sd"> >>> df.name</span> |
| <span class="sd"> Column<'name'></span> |
| <span class="sd"> >>> df["name"]</span> |
| <span class="sd"> Column<'name'></span> |
| |
| <span class="sd"> Create from an expression</span> |
| |
| <span class="sd"> >>> df.age + 1</span> |
| <span class="sd"> Column<...></span> |
| <span class="sd"> >>> 1 / df.age</span> |
| <span class="sd"> Column<...></span> |
| <span class="sd"> """</span> |
| |
| <span class="c1"># HACK ALERT!! this is to reduce the backward compatibility concern, and returns</span> |
| <span class="c1"># Spark Classic Column by default. This is NOT an API, and NOT supposed to</span> |
| <span class="c1"># be directly invoked. DO NOT use this constructor.</span> |
| <span class="k">def</span> <span class="fm">__new__</span><span class="p">(</span> |
| <span class="bp">cls</span><span class="p">,</span> |
| <span class="n">jc</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql.classic.column</span> <span class="kn">import</span> <span class="n">Column</span> |
| |
| <span class="k">return</span> <span class="n">Column</span><span class="o">.</span><span class="fm">__new__</span><span class="p">(</span><span class="n">Column</span><span class="p">,</span> <span class="n">jc</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">jc</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span> <span class="o">=</span> <span class="n">jc</span> |
| |
| <span class="c1"># arithmetic operators</span> |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__neg__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__sub__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__mul__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">__div__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__truediv__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__mod__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__radd__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__rsub__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__rmul__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">__rdiv__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__rtruediv__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__rmod__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__pow__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__rpow__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="c1"># logistic operators</span> |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__eq__</span><span class="p">(</span> <span class="c1"># type: ignore[override]</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">],</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""binary function"""</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__ne__</span><span class="p">(</span> <span class="c1"># type: ignore[override]</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""binary function"""</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__lt__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__le__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__ge__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__gt__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <div class="viewcode-block" id="Column.eqNullSafe"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.eqNullSafe.html#pyspark.sql.Column.eqNullSafe">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">eqNullSafe</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Equality test that is safe for null values.</span> |
| |
| <span class="sd"> .. versionadded:: 2.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other</span> |
| <span class="sd"> a value or :class:`Column`</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df1 = spark.createDataFrame([</span> |
| <span class="sd"> ... Row(id=1, value='foo'),</span> |
| <span class="sd"> ... Row(id=2, value=None)</span> |
| <span class="sd"> ... ])</span> |
| <span class="sd"> >>> df1.select(</span> |
| <span class="sd"> ... df1['value'] == 'foo',</span> |
| <span class="sd"> ... df1['value'].eqNullSafe('foo'),</span> |
| <span class="sd"> ... df1['value'].eqNullSafe(None)</span> |
| <span class="sd"> ... ).show()</span> |
| <span class="sd"> +-------------+---------------+----------------+</span> |
| <span class="sd"> |(value = foo)|(value <=> foo)|(value <=> NULL)|</span> |
| <span class="sd"> +-------------+---------------+----------------+</span> |
| <span class="sd"> | true| true| false|</span> |
| <span class="sd"> | NULL| false| true|</span> |
| <span class="sd"> +-------------+---------------+----------------+</span> |
| <span class="sd"> >>> df2 = spark.createDataFrame([</span> |
| <span class="sd"> ... Row(value = 'bar'),</span> |
| <span class="sd"> ... Row(value = None)</span> |
| <span class="sd"> ... ])</span> |
| <span class="sd"> >>> df1.join(df2, df1["value"] == df2["value"]).count()</span> |
| <span class="sd"> 0</span> |
| <span class="sd"> >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count()</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> df2 = spark.createDataFrame([</span> |
| <span class="sd"> ... Row(id=1, value=float('NaN')),</span> |
| <span class="sd"> ... Row(id=2, value=42.0),</span> |
| <span class="sd"> ... Row(id=3, value=None)</span> |
| <span class="sd"> ... ])</span> |
| <span class="sd"> >>> df2.select(</span> |
| <span class="sd"> ... df2['value'].eqNullSafe(None),</span> |
| <span class="sd"> ... df2['value'].eqNullSafe(float('NaN')),</span> |
| <span class="sd"> ... df2['value'].eqNullSafe(42.0)</span> |
| <span class="sd"> ... ).show()</span> |
| <span class="sd"> +----------------+---------------+----------------+</span> |
| <span class="sd"> |(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)|</span> |
| <span class="sd"> +----------------+---------------+----------------+</span> |
| <span class="sd"> | false| true| false|</span> |
| <span class="sd"> | false| false| true|</span> |
| <span class="sd"> | true| false| false|</span> |
| <span class="sd"> +----------------+---------------+----------------+</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> Unlike Pandas, PySpark doesn't consider NaN values to be NULL. See the</span> |
| <span class="sd"> `NaN Semantics <https://spark.apache.org/docs/latest/sql-ref-datatypes.html#nan-semantics>`_</span> |
| <span class="sd"> for details.</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <span class="c1"># `and`, `or`, `not` cannot be overloaded in Python,</span> |
| <span class="c1"># so use bitwise operators as boolean operators</span> |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__and__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__or__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__invert__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__rand__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__ror__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="c1"># container operators</span> |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__contains__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"CANNOT_APPLY_IN_FOR_COLUMN"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{},</span> |
| <span class="p">)</span> |
| |
| <span class="c1"># bitwise operators</span> |
| <div class="viewcode-block" id="Column.bitwiseOR"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.bitwiseOR.html#pyspark.sql.Column.bitwiseOR">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">bitwiseOR</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">""" "</span> |
| <span class="sd"> Compute bitwise OR of this expression with another expression.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other</span> |
| <span class="sd"> a value or :class:`Column` to calculate bitwise or(|) with</span> |
| <span class="sd"> this :class:`Column`.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(a=170, b=75)])</span> |
| <span class="sd"> >>> df.select(df.a.bitwiseOR(df.b)).collect()</span> |
| <span class="sd"> [Row((a | b)=235)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.bitwiseAND"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.bitwiseAND.html#pyspark.sql.Column.bitwiseAND">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">bitwiseAND</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Compute bitwise AND of this expression with another expression.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other</span> |
| <span class="sd"> a value or :class:`Column` to calculate bitwise and(&) with</span> |
| <span class="sd"> this :class:`Column`.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(a=170, b=75)])</span> |
| <span class="sd"> >>> df.select(df.a.bitwiseAND(df.b)).collect()</span> |
| <span class="sd"> [Row((a & b)=10)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.bitwiseXOR"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.bitwiseXOR.html#pyspark.sql.Column.bitwiseXOR">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">bitwiseXOR</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Compute bitwise XOR of this expression with another expression.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other</span> |
| <span class="sd"> a value or :class:`Column` to calculate bitwise xor(^) with</span> |
| <span class="sd"> this :class:`Column`.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(a=170, b=75)])</span> |
| <span class="sd"> >>> df.select(df.a.bitwiseXOR(df.b)).collect()</span> |
| <span class="sd"> [Row((a ^ b)=225)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.getItem"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.getItem.html#pyspark.sql.Column.getItem">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">getItem</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets an item at position ``ordinal`` out of a list,</span> |
| <span class="sd"> or gets an item by key out of a dict.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> key</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| <span class="sd"> The result will only be true at a location if the item matches in the column.</span> |
| |
| <span class="sd"> .. deprecated:: 3.0.0</span> |
| <span class="sd"> :class:`Column` as a parameter is deprecated.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing the item(s) got at position out of a list or by key out of a dict.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])</span> |
| <span class="sd"> >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()</span> |
| <span class="sd"> +----+------+</span> |
| <span class="sd"> |l[0]|d[key]|</span> |
| <span class="sd"> +----+------+</span> |
| <span class="sd"> | 1| value|</span> |
| <span class="sd"> +----+------+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.getField"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.getField.html#pyspark.sql.Column.getField">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">getField</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets a field by name in a :class:`StructType`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> name</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| <span class="sd"> The result will only be true at a location if the field matches in the Column.</span> |
| |
| <span class="sd"> .. deprecated:: 3.0.0</span> |
| <span class="sd"> :class:`Column` as a parameter is deprecated.</span> |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column got by name.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))])</span> |
| <span class="sd"> >>> df.select(df.r.getField("b")).show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> |r.b|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | b|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> >>> df.select(df.r.a).show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> |r.a|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | 1|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.withField"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.withField.html#pyspark.sql.Column.withField">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">withField</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fieldName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that adds/replaces a field in :class:`StructType` by name.</span> |
| |
| <span class="sd"> .. versionadded:: 3.1.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> fieldName : str</span> |
| <span class="sd"> a literal value.</span> |
| <span class="sd"> The result will only be true at a location if any field matches in the Column.</span> |
| <span class="sd"> col : :class:`Column`</span> |
| <span class="sd"> A :class:`Column` expression for the column with `fieldName`.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column</span> |
| <span class="sd"> which field was added/replaced by fieldName.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> from pyspark.sql.functions import lit</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(a=Row(b=1, c=2))])</span> |
| <span class="sd"> >>> df.withColumn('a', df['a'].withField('b', lit(3))).select('a.b').show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | b|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | 3|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> >>> df.withColumn('a', df['a'].withField('d', lit(4))).select('a.d').show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | d|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | 4|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.dropFields"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.dropFields.html#pyspark.sql.Column.dropFields">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">dropFields</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">fieldNames</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that drops fields in :class:`StructType` by name.</span> |
| <span class="sd"> This is a no-op if the schema doesn't contain field name(s).</span> |
| |
| <span class="sd"> .. versionadded:: 3.1.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> fieldNames : str</span> |
| <span class="sd"> Desired field names (collects all positional arguments passed)</span> |
| <span class="sd"> The result will drop at a location if any field matches in the Column.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column with field dropped by fieldName.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> from pyspark.sql.functions import col, lit</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... Row(a=Row(b=1, c=2, d=3, e=Row(f=4, g=5, h=6)))])</span> |
| <span class="sd"> >>> df.withColumn('a', df['a'].dropFields('b')).show()</span> |
| <span class="sd"> +-----------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +-----------------+</span> |
| <span class="sd"> |{2, 3, {4, 5, 6}}|</span> |
| <span class="sd"> +-----------------+</span> |
| |
| <span class="sd"> >>> df.withColumn('a', df['a'].dropFields('b', 'c')).show()</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> |{3, {4, 5, 6}}|</span> |
| <span class="sd"> +--------------+</span> |
| |
| <span class="sd"> This method supports dropping multiple nested fields directly e.g.</span> |
| |
| <span class="sd"> >>> df.withColumn("a", col("a").dropFields("e.g", "e.h")).show()</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> |{1, 2, 3, {4}}|</span> |
| <span class="sd"> +--------------+</span> |
| |
| <span class="sd"> However, if you are going to add/replace multiple nested fields,</span> |
| <span class="sd"> it is preferred to extract out the nested struct before</span> |
| <span class="sd"> adding/replacing multiple fields e.g.</span> |
| |
| <span class="sd"> >>> df.select(col("a").withField(</span> |
| <span class="sd"> ... "e", col("a.e").dropFields("g", "h")).alias("a")</span> |
| <span class="sd"> ... ).show()</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> |{1, 2, 3, {4}}|</span> |
| <span class="sd"> +--------------+</span> |
| |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.__getattr__"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.__getattr__.html#pyspark.sql.Column.__getattr__">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets an item at position ``ordinal`` out of a list,</span> |
| <span class="sd"> or gets an item by key out of a dict.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> item</span> |
| <span class="sd"> a literal value.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing the item got by key out of a dict.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])</span> |
| <span class="sd"> >>> df.select(df.d.key).show()</span> |
| <span class="sd"> +------+</span> |
| <span class="sd"> |d[key]|</span> |
| <span class="sd"> +------+</span> |
| <span class="sd"> | value|</span> |
| <span class="sd"> +------+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.__getitem__"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.__getitem__.html#pyspark.sql.Column.__getitem__">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets an item at position ``ordinal`` out of a list,</span> |
| <span class="sd"> or gets an item by key out of a dict.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> k</span> |
| <span class="sd"> a literal value, or a slice object without step.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing the item got by key out of a dict, or substrings sliced by</span> |
| <span class="sd"> the given slice object.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])</span> |
| <span class="sd"> >>> df.select(df.l[slice(1, 3)], df.d['key']).show()</span> |
| <span class="sd"> +---------------+------+</span> |
| <span class="sd"> |substr(l, 1, 3)|d[key]|</span> |
| <span class="sd"> +---------------+------+</span> |
| <span class="sd"> | abc| value|</span> |
| <span class="sd"> +---------------+------+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="c1"># string methods</span> |
| <div class="viewcode-block" id="Column.contains"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.contains.html#pyspark.sql.Column.contains">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">contains</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Contains the other element. Returns a boolean :class:`Column` based on a string match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other</span> |
| <span class="sd"> string in line. A value as a literal or a :class:`Column`.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.contains('o')).collect()</span> |
| <span class="sd"> [Row(age=5, name='Bob')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.startswith"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.startswith.html#pyspark.sql.Column.startswith">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">startswith</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> String starts with. Returns a boolean :class:`Column` based on a string match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : :class:`Column` or str</span> |
| <span class="sd"> string at start of line (do not use a regex `^`)</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.startswith('Al')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> >>> df.filter(df.name.startswith('^Al')).collect()</span> |
| <span class="sd"> []</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.endswith"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.endswith.html#pyspark.sql.Column.endswith">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">endswith</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> String ends with. Returns a boolean :class:`Column` based on a string match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : :class:`Column` or str</span> |
| <span class="sd"> string at end of line (do not use a regex `$`)</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.endswith('ice')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> >>> df.filter(df.name.endswith('ice$')).collect()</span> |
| <span class="sd"> []</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.like"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.like.html#pyspark.sql.Column.like">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">like</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : str</span> |
| <span class="sd"> a SQL LIKE pattern</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.Column.rlike</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element</span> |
| <span class="sd"> in the Column is matched by SQL LIKE pattern.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.like('Al%')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.rlike"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.rlike.html#pyspark.sql.Column.rlike">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">rlike</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex</span> |
| <span class="sd"> match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : str</span> |
| <span class="sd"> an extended regex expression</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element</span> |
| <span class="sd"> in the Column is matched by extended regex expression.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.rlike('ice$')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.ilike"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.ilike.html#pyspark.sql.Column.ilike">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">ilike</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> SQL ILIKE expression (case insensitive LIKE). Returns a boolean :class:`Column`</span> |
| <span class="sd"> based on a case insensitive match.</span> |
| |
| <span class="sd"> .. versionadded:: 3.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : str</span> |
| <span class="sd"> a SQL LIKE pattern</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.Column.rlike</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element</span> |
| <span class="sd"> in the Column is matched by SQL LIKE pattern.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.ilike('%Ice')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <span class="nd">@overload</span> |
| <span class="k">def</span> <span class="nf">substr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">startPos</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">length</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@overload</span> |
| <span class="k">def</span> <span class="nf">substr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">startPos</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">length</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <div class="viewcode-block" id="Column.substr"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.substr.html#pyspark.sql.Column.substr">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">substr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">startPos</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="s2">"Column"</span><span class="p">],</span> <span class="n">length</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="s2">"Column"</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return a :class:`Column` which is a substring of the column.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> startPos : :class:`Column` or int</span> |
| <span class="sd"> start position</span> |
| <span class="sd"> length : :class:`Column` or int</span> |
| <span class="sd"> length of the substring</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is substr of origin Column.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| |
| <span class="sd"> Example 1. Using integers for the input arguments.</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name.substr(1, 3).alias("col")).collect()</span> |
| <span class="sd"> [Row(col='Ali'), Row(col='Bob')]</span> |
| |
| <span class="sd"> Example 2. Using columns for the input arguments.</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(3, 4, "Alice"), (2, 3, "Bob")], ["sidx", "eidx", "name"])</span> |
| <span class="sd"> >>> df.select(df.name.substr(df.sidx, df.eidx).alias("col")).collect()</span> |
| <span class="sd"> [Row(col='ice'), Row(col='ob')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.isin"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.isin.html#pyspark.sql.Column.isin">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">isin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> A boolean expression that is evaluated to true if the value of this</span> |
| <span class="sd"> expression is contained by the evaluated values of the arguments.</span> |
| |
| <span class="sd"> .. versionadded:: 1.5.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> cols : Any</span> |
| <span class="sd"> The values to compare with the column values. The result will only be true at a location</span> |
| <span class="sd"> if any value matches in the Column.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element in the Column is contained in cols.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob"), (8, "Mike")], ["age", "name"])</span> |
| |
| <span class="sd"> Example 1: Filter rows with names in the specified values</span> |
| |
| <span class="sd"> >>> df[df.name.isin("Bob", "Mike")].show()</span> |
| <span class="sd"> +---+----+</span> |
| <span class="sd"> |age|name|</span> |
| <span class="sd"> +---+----+</span> |
| <span class="sd"> | 5| Bob|</span> |
| <span class="sd"> | 8|Mike|</span> |
| <span class="sd"> +---+----+</span> |
| |
| <span class="sd"> Example 2: Filter rows with ages in the specified list</span> |
| |
| <span class="sd"> >>> df[df.age.isin([1, 2, 3])].show()</span> |
| <span class="sd"> +---+-----+</span> |
| <span class="sd"> |age| name|</span> |
| <span class="sd"> +---+-----+</span> |
| <span class="sd"> | 2|Alice|</span> |
| <span class="sd"> +---+-----+</span> |
| |
| <span class="sd"> Example 3: Filter rows with names not in the specified values</span> |
| |
| <span class="sd"> >>> df[~df.name.isin("Alice", "Bob")].show()</span> |
| <span class="sd"> +---+----+</span> |
| <span class="sd"> |age|name|</span> |
| <span class="sd"> +---+----+</span> |
| <span class="sd"> | 8|Mike|</span> |
| <span class="sd"> +---+----+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <span class="c1"># order</span> |
| <div class="viewcode-block" id="Column.asc"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.asc.html#pyspark.sql.Column.asc">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">asc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns a sort expression based on the ascending order of the column.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])</span> |
| <span class="sd"> >>> df.select(df.name).orderBy(df.name.asc()).collect()</span> |
| <span class="sd"> [Row(name='Alice'), Row(name='Tom')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.asc_nulls_first"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.asc_nulls_first.html#pyspark.sql.Column.asc_nulls_first">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">asc_nulls_first</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns a sort expression based on ascending order of the column, and null values</span> |
| <span class="sd"> return before non-null values.</span> |
| |
| <span class="sd"> .. versionadded:: 2.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="sd"> >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()</span> |
| <span class="sd"> [Row(name=None), Row(name='Alice'), Row(name='Tom')]</span> |
| |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.asc_nulls_last"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.asc_nulls_last.html#pyspark.sql.Column.asc_nulls_last">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">asc_nulls_last</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns a sort expression based on ascending order of the column, and null values</span> |
| <span class="sd"> appear after non-null values.</span> |
| |
| <span class="sd"> .. versionadded:: 2.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="sd"> >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()</span> |
| <span class="sd"> [Row(name='Alice'), Row(name='Tom'), Row(name=None)]</span> |
| |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.desc"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.desc.html#pyspark.sql.Column.desc">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">desc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns a sort expression based on the descending order of the column.</span> |
| |
| <span class="sd"> .. versionadded:: 2.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])</span> |
| <span class="sd"> >>> df.select(df.name).orderBy(df.name.desc()).collect()</span> |
| <span class="sd"> [Row(name='Tom'), Row(name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.desc_nulls_first"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.desc_nulls_first.html#pyspark.sql.Column.desc_nulls_first">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">desc_nulls_first</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns a sort expression based on the descending order of the column, and null values</span> |
| <span class="sd"> appear before non-null values.</span> |
| |
| <span class="sd"> .. versionadded:: 2.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="sd"> >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()</span> |
| <span class="sd"> [Row(name=None), Row(name='Tom'), Row(name='Alice')]</span> |
| |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.desc_nulls_last"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.desc_nulls_last.html#pyspark.sql.Column.desc_nulls_last">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">desc_nulls_last</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns a sort expression based on the descending order of the column, and null values</span> |
| <span class="sd"> appear after non-null values.</span> |
| |
| <span class="sd"> .. versionadded:: 2.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="sd"> >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()</span> |
| <span class="sd"> [Row(name='Tom'), Row(name='Alice'), Row(name=None)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.isNull"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.isNull.html#pyspark.sql.Column.isNull">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">isNull</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> True if the current expression is null.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])</span> |
| <span class="sd"> >>> df.filter(df.height.isNull()).collect()</span> |
| <span class="sd"> [Row(name='Alice', height=None)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.isNotNull"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.isNotNull.html#pyspark.sql.Column.isNotNull">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">isNotNull</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> True if the current expression is NOT null.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])</span> |
| <span class="sd"> >>> df.filter(df.height.isNotNull()).collect()</span> |
| <span class="sd"> [Row(name='Tom', height=80)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.isNaN"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.isNaN.html#pyspark.sql.Column.isNaN">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">isNaN</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> True if the current expression is NaN.</span> |
| |
| <span class="sd"> .. versionadded:: 4.0.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [Row(name='Tom', height=80.0), Row(name='Alice', height=float('nan'))])</span> |
| <span class="sd"> >>> df.filter(df.height.isNaN()).collect()</span> |
| <span class="sd"> [Row(name='Alice', height=nan)]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.alias"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.alias.html#pyspark.sql.Column.alias">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">alias</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns this column aliased with a new name or names (in the case of expressions that</span> |
| <span class="sd"> return more than one column, such as explode).</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> alias : str</span> |
| <span class="sd"> desired column names (collects all positional arguments passed)</span> |
| |
| <span class="sd"> Other Parameters</span> |
| <span class="sd"> ----------------</span> |
| <span class="sd"> metadata: dict</span> |
| <span class="sd"> a dict of information to be stored in ``metadata`` attribute of the</span> |
| <span class="sd"> corresponding :class:`StructField <pyspark.sql.types.StructField>` (optional, keyword</span> |
| <span class="sd"> only argument)</span> |
| |
| <span class="sd"> .. versionchanged:: 2.2.0</span> |
| <span class="sd"> Added optional ``metadata`` argument.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is aliased with new name or names.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.age.alias("age2")).collect()</span> |
| <span class="sd"> [Row(age2=2), Row(age2=5)]</span> |
| <span class="sd"> >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max']</span> |
| <span class="sd"> 99</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.name"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.name.html#pyspark.sql.Column.name">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">name</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">alias</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> :func:`name` is an alias for :func:`alias`.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.cast"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.cast.html#pyspark.sql.Column.cast">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">cast</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataType</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataType</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Casts the column into type ``dataType``.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dataType : :class:`DataType` or str</span> |
| <span class="sd"> a DataType or Python string literal with a DDL-formatted string</span> |
| <span class="sd"> to use when parsing the column to the same type.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is cast into new type.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql.types import StringType</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.age.cast("string").alias('ages')).collect()</span> |
| <span class="sd"> [Row(ages='2'), Row(ages='5')]</span> |
| <span class="sd"> >>> df.select(df.age.cast(StringType()).alias('ages')).collect()</span> |
| <span class="sd"> [Row(ages='2'), Row(ages='5')]</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.try_cast"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.try_cast.html#pyspark.sql.Column.try_cast">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">try_cast</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataType</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataType</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> This is a special version of `cast` that performs the same operation, but returns a NULL</span> |
| <span class="sd"> value instead of raising an error if the invoke method throws exception.</span> |
| |
| <span class="sd"> .. versionadded:: 4.0.0</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dataType : :class:`DataType` or str</span> |
| <span class="sd"> a DataType or Python string literal with a DDL-formatted string</span> |
| <span class="sd"> to use when parsing the column to the same type.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is cast into new type.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Example 1: Cast with a Datatype</span> |
| |
| <span class="sd"> >>> from pyspark.sql.types import LongType</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "123"), (5, "Bob"), (3, None)], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name.try_cast(LongType())).show()</span> |
| <span class="sd"> +----+</span> |
| <span class="sd"> |name|</span> |
| <span class="sd"> +----+</span> |
| <span class="sd"> | 123|</span> |
| <span class="sd"> |NULL|</span> |
| <span class="sd"> |NULL|</span> |
| <span class="sd"> +----+</span> |
| |
| <span class="sd"> Example 2: Cast with a DDL string</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "123"), (5, "Bob"), (3, None)], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name.try_cast("double")).show()</span> |
| <span class="sd"> +-----+</span> |
| <span class="sd"> | name|</span> |
| <span class="sd"> +-----+</span> |
| <span class="sd"> |123.0|</span> |
| <span class="sd"> | NULL|</span> |
| <span class="sd"> | NULL|</span> |
| <span class="sd"> +-----+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.astype"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.astype.html#pyspark.sql.Column.astype">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">astype</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataType</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataType</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> :func:`astype` is an alias for :func:`cast`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.between"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.between.html#pyspark.sql.Column.between">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">between</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">lowerBound</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">],</span> |
| <span class="n">upperBound</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">],</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Check if the current column's values are between the specified lower and upper</span> |
| <span class="sd"> bounds, inclusive.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> lowerBound : :class:`Column`, int, float, string, bool, datetime, date or Decimal</span> |
| <span class="sd"> The lower boundary value, inclusive.</span> |
| <span class="sd"> upperBound : :class:`Column`, int, float, string, bool, datetime, date or Decimal</span> |
| <span class="sd"> The upper boundary value, inclusive.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> A new column of boolean values indicating whether each element in the original</span> |
| <span class="sd"> column is within the specified range (inclusive).</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Using between with integer values.</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name, df.age.between(2, 4)).show()</span> |
| <span class="sd"> +-----+---------------------------+</span> |
| <span class="sd"> | name|((age >= 2) AND (age <= 4))|</span> |
| <span class="sd"> +-----+---------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| false|</span> |
| <span class="sd"> +-----+---------------------------+</span> |
| |
| <span class="sd"> Using between with string values.</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame([("Alice", "A"), ("Bob", "B")], ["name", "initial"])</span> |
| <span class="sd"> >>> df.select(df.name, df.initial.between("A", "B")).show()</span> |
| <span class="sd"> +-----+-----------------------------------+</span> |
| <span class="sd"> | name|((initial >= A) AND (initial <= B))|</span> |
| <span class="sd"> +-----+-----------------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| true|</span> |
| <span class="sd"> +-----+-----------------------------------+</span> |
| |
| <span class="sd"> Using between with float values.</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2.5, "Alice"), (5.5, "Bob")], ["height", "name"])</span> |
| <span class="sd"> >>> df.select(df.name, df.height.between(2.0, 5.0)).show()</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| <span class="sd"> | name|((height >= 2.0) AND (height <= 5.0))|</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| false|</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| |
| <span class="sd"> Using between with date values.</span> |
| |
| <span class="sd"> >>> import pyspark.sql.functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [("Alice", "2023-01-01"), ("Bob", "2023-02-01")], ["name", "date"])</span> |
| <span class="sd"> >>> df = df.withColumn("date", sf.to_date(df.date))</span> |
| <span class="sd"> >>> df.select(df.name, df.date.between("2023-01-01", "2023-01-15")).show()</span> |
| <span class="sd"> +-----+-----------------------------------------------+</span> |
| <span class="sd"> | name|((date >= 2023-01-01) AND (date <= 2023-01-15))|</span> |
| <span class="sd"> +-----+-----------------------------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| false|</span> |
| <span class="sd"> +-----+-----------------------------------------------+</span> |
| <span class="sd"> >>> from datetime import date</span> |
| <span class="sd"> >>> df.select(df.name, df.date.between(date(2023, 1, 1), date(2023, 1, 15))).show()</span> |
| <span class="sd"> +-----+-------------------------------------------------------------+</span> |
| <span class="sd"> | name|((date >= DATE '2023-01-01') AND (date <= DATE '2023-01-15'))|</span> |
| <span class="sd"> +-----+-------------------------------------------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| false|</span> |
| <span class="sd"> +-----+-------------------------------------------------------------+</span> |
| |
| <span class="sd"> Using between with timestamp values.</span> |
| |
| <span class="sd"> >>> import pyspark.sql.functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [("Alice", "2023-01-01 10:00:00"), ("Bob", "2023-02-01 10:00:00")],</span> |
| <span class="sd"> ... schema=["name", "timestamp"])</span> |
| <span class="sd"> >>> df = df.withColumn("timestamp", sf.to_timestamp(df.timestamp))</span> |
| <span class="sd"> >>> df.select(df.name, df.timestamp.between("2023-01-01", "2023-02-01")).show()</span> |
| <span class="sd"> +-----+---------------------------------------------------------+</span> |
| <span class="sd"> | name|((timestamp >= 2023-01-01) AND (timestamp <= 2023-02-01))|</span> |
| <span class="sd"> +-----+---------------------------------------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| false|</span> |
| <span class="sd"> +-----+---------------------------------------------------------+</span> |
| <span class="sd"> >>> df.select(df.name, df.timestamp.between("2023-01-01", "2023-02-01 12:00:00")).show()</span> |
| <span class="sd"> +-----+------------------------------------------------------------------+</span> |
| <span class="sd"> | name|((timestamp >= 2023-01-01) AND (timestamp <= 2023-02-01 12:00:00))|</span> |
| <span class="sd"> +-----+------------------------------------------------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| true|</span> |
| <span class="sd"> +-----+------------------------------------------------------------------+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.when"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.when.html#pyspark.sql.Column.when">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">when</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">condition</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Evaluates a list of conditions and returns one of multiple possible result expressions.</span> |
| <span class="sd"> If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> condition : :class:`Column`</span> |
| <span class="sd"> a boolean :class:`Column` expression.</span> |
| <span class="sd"> value</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is in conditions.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Example 1: Using :func:`when` with conditions and values to create a new Column</span> |
| |
| <span class="sd"> >>> from pyspark.sql import functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> result = df.select(df.name, sf.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0))</span> |
| <span class="sd"> >>> result.show()</span> |
| <span class="sd"> +-----+------------------------------------------------------------+</span> |
| <span class="sd"> | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0 END|</span> |
| <span class="sd"> +-----+------------------------------------------------------------+</span> |
| <span class="sd"> |Alice| -1|</span> |
| <span class="sd"> | Bob| 1|</span> |
| <span class="sd"> +-----+------------------------------------------------------------+</span> |
| |
| <span class="sd"> Example 2: Chaining multiple :func:`when` conditions</span> |
| |
| <span class="sd"> >>> from pyspark.sql import functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame([(1, "Alice"), (4, "Bob"), (6, "Charlie")], ["age", "name"])</span> |
| <span class="sd"> >>> result = df.select(</span> |
| <span class="sd"> ... df.name,</span> |
| <span class="sd"> ... sf.when(df.age < 3, "Young").when(df.age < 5, "Middle-aged").otherwise("Old")</span> |
| <span class="sd"> ... )</span> |
| <span class="sd"> >>> result.show()</span> |
| <span class="sd"> +-------+---------------------------------------------------------------------------+</span> |
| <span class="sd"> | name|CASE WHEN (age < 3) THEN Young WHEN (age < 5) THEN Middle-aged ELSE Old END|</span> |
| <span class="sd"> +-------+---------------------------------------------------------------------------+</span> |
| <span class="sd"> | Alice| Young|</span> |
| <span class="sd"> | Bob| Middle-aged|</span> |
| <span class="sd"> |Charlie| Old|</span> |
| <span class="sd"> +-------+---------------------------------------------------------------------------+</span> |
| |
| <span class="sd"> Example 3: Using literal values as conditions</span> |
| |
| <span class="sd"> >>> from pyspark.sql import functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> result = df.select(</span> |
| <span class="sd"> ... df.name, sf.when(sf.lit(True), 1).otherwise(</span> |
| <span class="sd"> ... sf.raise_error("unreachable")).alias("when"))</span> |
| <span class="sd"> >>> result.show()</span> |
| <span class="sd"> +-----+----+</span> |
| <span class="sd"> | name|when|</span> |
| <span class="sd"> +-----+----+</span> |
| <span class="sd"> |Alice| 1|</span> |
| <span class="sd"> | Bob| 1|</span> |
| <span class="sd"> +-----+----+</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.functions.when</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.otherwise"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.otherwise.html#pyspark.sql.Column.otherwise">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Evaluates a list of conditions and returns one of multiple possible result expressions.</span> |
| <span class="sd"> If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> value</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is unmatched conditions.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name, sf.when(df.age > 3, 1).otherwise(0)).show()</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| <span class="sd"> | name|CASE WHEN (age > 3) THEN 1 ELSE 0 END|</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| <span class="sd"> |Alice| 0|</span> |
| <span class="sd"> | Bob| 1|</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.functions.when</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <div class="viewcode-block" id="Column.over"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.over.html#pyspark.sql.Column.over">[docs]</a> <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">over</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">window</span><span class="p">:</span> <span class="s2">"WindowSpec"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Define a windowing column.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> window : :class:`WindowSpec`</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Window</span> |
| <span class="sd"> >>> window = (</span> |
| <span class="sd"> ... Window.partitionBy("name")</span> |
| <span class="sd"> ... .orderBy("age")</span> |
| <span class="sd"> ... .rowsBetween(Window.unboundedPreceding, Window.currentRow)</span> |
| <span class="sd"> ... )</span> |
| <span class="sd"> >>> from pyspark.sql.functions import rank, min, desc</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.withColumn(</span> |
| <span class="sd"> ... "rank", rank().over(window)</span> |
| <span class="sd"> ... ).withColumn(</span> |
| <span class="sd"> ... "min", min('age').over(window)</span> |
| <span class="sd"> ... ).sort(desc("age")).show()</span> |
| <span class="sd"> +---+-----+----+---+</span> |
| <span class="sd"> |age| name|rank|min|</span> |
| <span class="sd"> +---+-----+----+---+</span> |
| <span class="sd"> | 5| Bob| 1| 5|</span> |
| <span class="sd"> | 2|Alice| 1| 2|</span> |
| <span class="sd"> +---+-----+----+---+</span> |
| <span class="sd"> """</span> |
| <span class="o">...</span></div> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="nf">__nonzero__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__bool__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@dispatch_col_method</span> |
| <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="o">...</span></div> |
| |
| |
| <span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="kn">import</span> <span class="nn">doctest</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span> |
| <span class="kn">import</span> <span class="nn">pyspark.sql.column</span> |
| |
| <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> |
| <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[4]"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"sql.column tests"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> |
| <span class="n">globs</span><span class="p">[</span><span class="s2">"spark"</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span> |
| |
| <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span> |
| <span class="n">pyspark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> |
| <span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> |
| <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">REPORT_NDIFF</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> |
| <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> |
| <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> |
| |
| |
| <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> |
| <span class="n">_test</span><span class="p">()</span> |
| </pre></div> |
| |
| </article> |
| |
| |
| |
| <footer class="bd-footer-article"> |
| |
| <div class="footer-article-items footer-article__inner"> |
| |
| <div class="footer-article-item"><!-- Previous / next buttons --> |
| <div class="prev-next-area"> |
| </div></div> |
| |
| </div> |
| |
| </footer> |
| |
| </div> |
| |
| |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script> |
| <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"><p class="copyright"> |
| Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. |
| </p></div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"><p class="theme-version"> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |