| |
| <!DOCTYPE html> |
| |
| <html> |
| <head> |
| <meta charset="utf-8" /> |
| <title>pyspark.sql.column — PySpark 3.5.5 documentation</title> |
| |
| <link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| |
| |
| <link rel="stylesheet" |
| href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| |
| |
| <link rel="stylesheet" href="../../../_static/styles/pydata-sphinx-theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> |
| |
| <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"> |
| |
| <script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script> |
| <script src="../../../_static/jquery.js"></script> |
| <script src="../../../_static/underscore.js"></script> |
| <script src="../../../_static/doctools.js"></script> |
| <script src="../../../_static/language_data.js"></script> |
| <script src="../../../_static/clipboard.min.js"></script> |
| <script src="../../../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| <script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script> |
| <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/sql/column.html" /> |
| <link rel="search" title="Search" href="../../../search.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="None"> |
| |
| |
| <!-- Google Analytics --> |
| |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <div class="container-fluid" id="banner"></div> |
| |
| |
| <nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl"> |
| |
| <div id="navbar-start"> |
| |
| |
| |
| <a class="navbar-brand" href="../../../index.html"> |
| <img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo"> |
| </a> |
| |
| |
| |
| </div> |
| |
| <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| |
| |
| <div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse"> |
| <div id="navbar-center" class="mr-auto"> |
| |
| <div class="navbar-center-item"> |
| <ul id="navbar-main-elements" class="navbar-nav"> |
| <li class="toctree-l1 nav-item"> |
| <a class="reference internal nav-link" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| <li class="toctree-l1 nav-item"> |
| <a class="reference internal nav-link" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| <li class="toctree-l1 nav-item"> |
| <a class="reference internal nav-link" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| <li class="toctree-l1 nav-item"> |
| <a class="reference internal nav-link" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| <li class="toctree-l1 nav-item"> |
| <a class="reference internal nav-link" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| <li class="toctree-l1 nav-item"> |
| <a class="reference internal nav-link" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| |
| </ul> |
| </div> |
| |
| </div> |
| |
| <div id="navbar-end"> |
| |
| <div class="navbar-end-item"> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 3.5.5 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/sql/column.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script> |
| </div> |
| |
| </div> |
| </div> |
| </div> |
| </nav> |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| |
| <!-- Only show if we have sidebars configured, else just a small margin --> |
| <div class="col-12 col-md-3 bd-sidebar"> |
| <div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| <div class="bd-toc-item active"> |
| |
| </div> |
| </nav> |
| </div> |
| <div class="sidebar-end-items"> |
| </div> |
| </div> |
| |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <h1>Source code for pyspark.sql.column</h1><div class="highlight"><pre> |
| <span></span><span class="c1">#</span> |
| <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> |
| <span class="c1"># contributor license agreements. See the NOTICE file distributed with</span> |
| <span class="c1"># this work for additional information regarding copyright ownership.</span> |
| <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> |
| <span class="c1"># (the "License"); you may not use this file except in compliance with</span> |
| <span class="c1"># the License. You may obtain a copy of the License at</span> |
| <span class="c1">#</span> |
| <span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="c1">#</span> |
| <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> |
| <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="c1"># See the License for the specific language governing permissions and</span> |
| <span class="c1"># limitations under the License.</span> |
| <span class="c1">#</span> |
| |
| <span class="kn">import</span><span class="w"> </span><span class="nn">sys</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">json</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">warnings</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span> |
| <span class="n">cast</span><span class="p">,</span> |
| <span class="n">overload</span><span class="p">,</span> |
| <span class="n">Any</span><span class="p">,</span> |
| <span class="n">Callable</span><span class="p">,</span> |
| <span class="n">Iterable</span><span class="p">,</span> |
| <span class="n">List</span><span class="p">,</span> |
| <span class="n">Optional</span><span class="p">,</span> |
| <span class="n">Tuple</span><span class="p">,</span> |
| <span class="n">TYPE_CHECKING</span><span class="p">,</span> |
| <span class="n">Union</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="kn">from</span><span class="w"> </span><span class="nn">py4j.java_gateway</span><span class="w"> </span><span class="kn">import</span> <span class="n">JavaObject</span><span class="p">,</span> <span class="n">JVMView</span> |
| |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark</span><span class="w"> </span><span class="kn">import</span> <span class="n">copy_func</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.context</span><span class="w"> </span><span class="kn">import</span> <span class="n">SparkContext</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.errors</span><span class="w"> </span><span class="kn">import</span> <span class="n">PySparkAttributeError</span><span class="p">,</span> <span class="n">PySparkTypeError</span><span class="p">,</span> <span class="n">PySparkValueError</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql.types</span><span class="w"> </span><span class="kn">import</span> <span class="n">DataType</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql.utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_active_spark_context</span> |
| |
| <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql._typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">ColumnOrName</span><span class="p">,</span> <span class="n">LiteralType</span><span class="p">,</span> <span class="n">DecimalLiteral</span><span class="p">,</span> <span class="n">DateTimeLiteral</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql.window</span><span class="w"> </span><span class="kn">import</span> <span class="n">WindowSpec</span> |
| |
| <span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"Column"</span><span class="p">]</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_create_column_from_literal</span><span class="p">(</span><span class="n">literal</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">JVMView</span><span class="p">,</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="p">)</span><span class="o">.</span><span class="n">functions</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">literal</span><span class="p">)</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_create_column_from_name</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">JVMView</span><span class="p">,</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="p">)</span><span class="o">.</span><span class="n">functions</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_to_java_column</span><span class="p">(</span><span class="n">col</span><span class="p">:</span> <span class="s2">"ColumnOrName"</span><span class="p">)</span> <span class="o">-></span> <span class="n">JavaObject</span><span class="p">:</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Column</span><span class="p">):</span> |
| <span class="n">jcol</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">_jc</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> |
| <span class="n">jcol</span> <span class="o">=</span> <span class="n">_create_column_from_name</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_COLUMN_OR_STR"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"col"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="n">jcol</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_to_java_expr</span><span class="p">(</span><span class="n">col</span><span class="p">:</span> <span class="s2">"ColumnOrName"</span><span class="p">)</span> <span class="o">-></span> <span class="n">JavaObject</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">_to_java_column</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="n">expr</span><span class="p">()</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_to_seq</span><span class="p">(</span> |
| <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> |
| <span class="n">cols</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="s2">"ColumnOrName"</span><span class="p">],</span> |
| <span class="n">converter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="s2">"ColumnOrName"</span><span class="p">],</span> <span class="n">JavaObject</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">JavaObject</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Convert a list of Columns (or names) into a JVM Seq of Column.</span> |
| |
| <span class="sd"> An optional `converter` could be used to convert items in `cols`</span> |
| <span class="sd"> into JVM Column objects.</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="n">converter</span><span class="p">:</span> |
| <span class="n">cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">converter</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">]</span> |
| <span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| <span class="k">return</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toSeq</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_to_list</span><span class="p">(</span> |
| <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> |
| <span class="n">cols</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="s2">"ColumnOrName"</span><span class="p">],</span> |
| <span class="n">converter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="s2">"ColumnOrName"</span><span class="p">],</span> <span class="n">JavaObject</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">JavaObject</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Convert a list of Columns (or names) into a JVM (Scala) List of Columns.</span> |
| |
| <span class="sd"> An optional `converter` could be used to convert items in `cols`</span> |
| <span class="sd"> into JVM Column objects.</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="n">converter</span><span class="p">:</span> |
| <span class="n">cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">converter</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">]</span> |
| <span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| <span class="k">return</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">PythonUtils</span><span class="o">.</span><span class="n">toList</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_unary_op</span><span class="p">(</span> |
| <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> |
| <span class="n">doc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"unary operator"</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">],</span> <span class="s2">"Column"</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""Create a method for given unary operator"""</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="n">name</span><span class="p">)()</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span> |
| |
| <span class="n">_</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">doc</span> |
| <span class="k">return</span> <span class="n">_</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_func_op</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">doc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">)</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">],</span> <span class="s2">"Column"</span><span class="p">]:</span> |
| <span class="k">def</span><span class="w"> </span><span class="nf">_</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="n">JVMView</span><span class="p">,</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="p">)</span><span class="o">.</span><span class="n">functions</span><span class="p">,</span> <span class="n">name</span><span class="p">)(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span> |
| |
| <span class="n">_</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">doc</span> |
| <span class="k">return</span> <span class="n">_</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_bin_func_op</span><span class="p">(</span> |
| <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> |
| <span class="n">reverse</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">doc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"binary function"</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">]:</span> |
| <span class="k">def</span><span class="w"> </span><span class="nf">_</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="n">fn</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="n">JVMView</span><span class="p">,</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="p">)</span><span class="o">.</span><span class="n">functions</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">_jc</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Column</span><span class="p">)</span> <span class="k">else</span> <span class="n">_create_column_from_literal</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> |
| <span class="n">njc</span> <span class="o">=</span> <span class="n">fn</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="n">jc</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">reverse</span> <span class="k">else</span> <span class="n">fn</span><span class="p">(</span><span class="n">jc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">njc</span><span class="p">)</span> |
| |
| <span class="n">_</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">doc</span> |
| <span class="k">return</span> <span class="n">_</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_bin_op</span><span class="p">(</span> |
| <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> |
| <span class="n">doc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"binary operator"</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[</span> |
| <span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span> |
| <span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""Create a method for given binary operator"""</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> |
| <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">],</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">_jc</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Column</span><span class="p">)</span> <span class="k">else</span> <span class="n">other</span> |
| <span class="n">njc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="n">name</span><span class="p">)(</span><span class="n">jc</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">njc</span><span class="p">)</span> |
| |
| <span class="n">_</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">doc</span> |
| <span class="k">return</span> <span class="n">_</span> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_reverse_op</span><span class="p">(</span> |
| <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> |
| <span class="n">doc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"binary operator"</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""Create a method for binary operator (this object is on right side)"""</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="n">jother</span> <span class="o">=</span> <span class="n">_create_column_from_literal</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">jother</span><span class="p">,</span> <span class="n">name</span><span class="p">)(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span> |
| |
| <span class="n">_</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">doc</span> |
| <span class="k">return</span> <span class="n">_</span> |
| |
| |
| <div class="viewcode-block" id="Column"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.html#pyspark.sql.Column">[docs]</a><span class="k">class</span><span class="w"> </span><span class="nc">Column</span><span class="p">:</span> |
| |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> A column in a DataFrame.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Column instances can be created by</span> |
| |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| |
| <span class="sd"> Select a column out of a DataFrame</span> |
| <span class="sd"> >>> df.name</span> |
| <span class="sd"> Column<'name'></span> |
| <span class="sd"> >>> df["name"]</span> |
| <span class="sd"> Column<'name'></span> |
| |
| <span class="sd"> Create from an expression</span> |
| |
| <span class="sd"> >>> df.age + 1</span> |
| <span class="sd"> Column<...></span> |
| <span class="sd"> >>> 1 / df.age</span> |
| <span class="sd"> Column<...></span> |
| <span class="sd"> """</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">jc</span><span class="p">:</span> <span class="n">JavaObject</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span> <span class="o">=</span> <span class="n">jc</span> |
| |
| <span class="c1"># arithmetic operators</span> |
| <span class="fm">__neg__</span> <span class="o">=</span> <span class="n">_func_op</span><span class="p">(</span><span class="s2">"negate"</span><span class="p">)</span> |
| <span class="fm">__add__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"plus"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="fm">__sub__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"minus"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="fm">__mul__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"multiply"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="n">__div__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"divide"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="fm">__truediv__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"divide"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="fm">__mod__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"mod"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="fm">__radd__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"plus"</span><span class="p">)</span> |
| <span class="p">)</span> |
| <span class="fm">__rsub__</span> <span class="o">=</span> <span class="n">_reverse_op</span><span class="p">(</span><span class="s2">"minus"</span><span class="p">)</span> |
| <span class="fm">__rmul__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"multiply"</span><span class="p">)</span> |
| <span class="p">)</span> |
| <span class="n">__rdiv__</span> <span class="o">=</span> <span class="n">_reverse_op</span><span class="p">(</span><span class="s2">"divide"</span><span class="p">)</span> |
| <span class="fm">__rtruediv__</span> <span class="o">=</span> <span class="n">_reverse_op</span><span class="p">(</span><span class="s2">"divide"</span><span class="p">)</span> |
| <span class="fm">__rmod__</span> <span class="o">=</span> <span class="n">_reverse_op</span><span class="p">(</span><span class="s2">"mod"</span><span class="p">)</span> |
| |
| <span class="fm">__pow__</span> <span class="o">=</span> <span class="n">_bin_func_op</span><span class="p">(</span><span class="s2">"pow"</span><span class="p">)</span> |
| <span class="fm">__rpow__</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">]],</span> <span class="s2">"Column"</span><span class="p">],</span> |
| <span class="n">_bin_func_op</span><span class="p">(</span><span class="s2">"pow"</span><span class="p">,</span> <span class="n">reverse</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <span class="p">)</span> |
| |
| <span class="c1"># logistic operators</span> |
| <span class="k">def</span><span class="w"> </span><span class="fm">__eq__</span><span class="p">(</span> <span class="c1"># type: ignore[override]</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">],</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""binary function"""</span> |
| <span class="k">return</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"equalTo"</span><span class="p">)(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="fm">__ne__</span><span class="p">(</span> <span class="c1"># type: ignore[override]</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""binary function"""</span> |
| <span class="k">return</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"notEqual"</span><span class="p">)(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="fm">__lt__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"lt"</span><span class="p">)</span> |
| <span class="fm">__le__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"leq"</span><span class="p">)</span> |
| <span class="fm">__ge__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"geq"</span><span class="p">)</span> |
| <span class="fm">__gt__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"gt"</span><span class="p">)</span> |
| |
| <span class="n">_eqNullSafe_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Equality test that is safe for null values.</span> |
| |
| <span class="s2"> .. versionadded:: 2.3.0</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other</span> |
| <span class="s2"> a value or :class:`Column`</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df1 = spark.createDataFrame([</span> |
| <span class="s2"> ... Row(id=1, value='foo'),</span> |
| <span class="s2"> ... Row(id=2, value=None)</span> |
| <span class="s2"> ... ])</span> |
| <span class="s2"> >>> df1.select(</span> |
| <span class="s2"> ... df1['value'] == 'foo',</span> |
| <span class="s2"> ... df1['value'].eqNullSafe('foo'),</span> |
| <span class="s2"> ... df1['value'].eqNullSafe(None)</span> |
| <span class="s2"> ... ).show()</span> |
| <span class="s2"> +-------------+---------------+----------------+</span> |
| <span class="s2"> |(value = foo)|(value <=> foo)|(value <=> NULL)|</span> |
| <span class="s2"> +-------------+---------------+----------------+</span> |
| <span class="s2"> | true| true| false|</span> |
| <span class="s2"> | NULL| false| true|</span> |
| <span class="s2"> +-------------+---------------+----------------+</span> |
| <span class="s2"> >>> df2 = spark.createDataFrame([</span> |
| <span class="s2"> ... Row(value = 'bar'),</span> |
| <span class="s2"> ... Row(value = None)</span> |
| <span class="s2"> ... ])</span> |
| <span class="s2"> >>> df1.join(df2, df1["value"] == df2["value"]).count()</span> |
| <span class="s2"> 0</span> |
| <span class="s2"> >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count()</span> |
| <span class="s2"> 1</span> |
| <span class="s2"> >>> df2 = spark.createDataFrame([</span> |
| <span class="s2"> ... Row(id=1, value=float('NaN')),</span> |
| <span class="s2"> ... Row(id=2, value=42.0),</span> |
| <span class="s2"> ... Row(id=3, value=None)</span> |
| <span class="s2"> ... ])</span> |
| <span class="s2"> >>> df2.select(</span> |
| <span class="s2"> ... df2['value'].eqNullSafe(None),</span> |
| <span class="s2"> ... df2['value'].eqNullSafe(float('NaN')),</span> |
| <span class="s2"> ... df2['value'].eqNullSafe(42.0)</span> |
| <span class="s2"> ... ).show()</span> |
| <span class="s2"> +----------------+---------------+----------------+</span> |
| <span class="s2"> |(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)|</span> |
| <span class="s2"> +----------------+---------------+----------------+</span> |
| <span class="s2"> | false| true| false|</span> |
| <span class="s2"> | false| false| true|</span> |
| <span class="s2"> | true| false| false|</span> |
| <span class="s2"> +----------------+---------------+----------------+</span> |
| |
| <span class="s2"> Notes</span> |
| <span class="s2"> -----</span> |
| <span class="s2"> Unlike Pandas, PySpark doesn't consider NaN values to be NULL. See the</span> |
| <span class="s2"> `NaN Semantics <https://spark.apache.org/docs/latest/sql-ref-datatypes.html#nan-semantics>`_</span> |
| <span class="s2"> for details.</span> |
| <span class="s2"> """</span> |
| <span class="n">eqNullSafe</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"eqNullSafe"</span><span class="p">,</span> <span class="n">_eqNullSafe_doc</span><span class="p">)</span> |
| |
| <span class="c1"># `and`, `or`, `not` cannot be overloaded in Python,</span> |
| <span class="c1"># so use bitwise operators as boolean operators</span> |
| <span class="fm">__and__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"and"</span><span class="p">)</span> |
| <span class="fm">__or__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"or"</span><span class="p">)</span> |
| <span class="fm">__invert__</span> <span class="o">=</span> <span class="n">_func_op</span><span class="p">(</span><span class="s2">"not"</span><span class="p">)</span> |
| <span class="fm">__rand__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"and"</span><span class="p">)</span> |
| <span class="fm">__ror__</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"or"</span><span class="p">)</span> |
| |
| <span class="c1"># container operators</span> |
| <span class="k">def</span><span class="w"> </span><span class="fm">__contains__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"CANNOT_APPLY_IN_FOR_COLUMN"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{},</span> |
| <span class="p">)</span> |
| |
| <span class="c1"># bitwise operators</span> |
| <span class="n">_bitwiseOR_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Compute bitwise OR of this expression with another expression.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other</span> |
| <span class="s2"> a value or :class:`Column` to calculate bitwise or(|) with</span> |
| <span class="s2"> this :class:`Column`.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([Row(a=170, b=75)])</span> |
| <span class="s2"> >>> df.select(df.a.bitwiseOR(df.b)).collect()</span> |
| <span class="s2"> [Row((a | b)=235)]</span> |
| <span class="s2"> """</span> |
| <span class="n">_bitwiseAND_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Compute bitwise AND of this expression with another expression.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other</span> |
| <span class="s2"> a value or :class:`Column` to calculate bitwise and(&) with</span> |
| <span class="s2"> this :class:`Column`.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([Row(a=170, b=75)])</span> |
| <span class="s2"> >>> df.select(df.a.bitwiseAND(df.b)).collect()</span> |
| <span class="s2"> [Row((a & b)=10)]</span> |
| <span class="s2"> """</span> |
| <span class="n">_bitwiseXOR_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Compute bitwise XOR of this expression with another expression.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other</span> |
| <span class="s2"> a value or :class:`Column` to calculate bitwise xor(^) with</span> |
| <span class="s2"> this :class:`Column`.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([Row(a=170, b=75)])</span> |
| <span class="s2"> >>> df.select(df.a.bitwiseXOR(df.b)).collect()</span> |
| <span class="s2"> [Row((a ^ b)=225)]</span> |
| <span class="s2"> """</span> |
| |
| <span class="n">bitwiseOR</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"bitwiseOR"</span><span class="p">,</span> <span class="n">_bitwiseOR_doc</span><span class="p">)</span> |
| <span class="n">bitwiseAND</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"bitwiseAND"</span><span class="p">,</span> <span class="n">_bitwiseAND_doc</span><span class="p">)</span> |
| <span class="n">bitwiseXOR</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"bitwiseXOR"</span><span class="p">,</span> <span class="n">_bitwiseXOR_doc</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="Column.getItem"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.getItem.html#pyspark.sql.Column.getItem">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">getItem</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets an item at position ``ordinal`` out of a list,</span> |
| <span class="sd"> or gets an item by key out of a dict.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> key</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| <span class="sd"> The result will only be true at a location if the item matches in the column.</span> |
| |
| <span class="sd"> .. deprecated:: 3.0.0</span> |
| <span class="sd"> :class:`Column` as a parameter is deprecated.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing the item(s) got at position out of a list or by key out of a dict.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])</span> |
| <span class="sd"> >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()</span> |
| <span class="sd"> +----+------+</span> |
| <span class="sd"> |l[0]|d[key]|</span> |
| <span class="sd"> +----+------+</span> |
| <span class="sd"> | 1| value|</span> |
| <span class="sd"> +----+------+</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">Column</span><span class="p">):</span> |
| <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> |
| <span class="s2">"A column as 'key' in getItem is deprecated as of Spark 3.0, and will not "</span> |
| <span class="s2">"be supported in the future release. Use `column[key]` or `column.key` syntax "</span> |
| <span class="s2">"instead."</span><span class="p">,</span> |
| <span class="ne">FutureWarning</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="p">[</span><span class="n">key</span><span class="p">]</span></div> |
| |
| <div class="viewcode-block" id="Column.getField"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.getField.html#pyspark.sql.Column.getField">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">getField</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets a field by name in a :class:`StructType`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> name</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| <span class="sd"> The result will only be true at a location if the field matches in the Column.</span> |
| |
| <span class="sd"> .. deprecated:: 3.0.0</span> |
| <span class="sd"> :class:`Column` as a parameter is deprecated.</span> |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column got by name.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))])</span> |
| <span class="sd"> >>> df.select(df.r.getField("b")).show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> |r.b|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | b|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> >>> df.select(df.r.a).show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> |r.a|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | 1|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">Column</span><span class="p">):</span> |
| <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> |
| <span class="s2">"A column as 'name' in getField is deprecated as of Spark 3.0, and will not "</span> |
| <span class="s2">"be supported in the future release. Use `column[name]` or `column.name` syntax "</span> |
| <span class="s2">"instead."</span><span class="p">,</span> |
| <span class="ne">FutureWarning</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="p">[</span><span class="n">name</span><span class="p">]</span></div> |
| |
| <div class="viewcode-block" id="Column.withField"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.withField.html#pyspark.sql.Column.withField">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">withField</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fieldName</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">col</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that adds/replaces a field in :class:`StructType` by name.</span> |
| |
| <span class="sd"> .. versionadded:: 3.1.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> fieldName : str</span> |
| <span class="sd"> a literal value.</span> |
| <span class="sd"> The result will only be true at a location if any field matches in the Column.</span> |
| <span class="sd"> col : :class:`Column`</span> |
| <span class="sd"> A :class:`Column` expression for the column with `fieldName`.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column</span> |
| <span class="sd"> which field was added/replaced by fieldName.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> from pyspark.sql.functions import lit</span> |
| <span class="sd"> >>> df = spark.createDataFrame([Row(a=Row(b=1, c=2))])</span> |
| <span class="sd"> >>> df.withColumn('a', df['a'].withField('b', lit(3))).select('a.b').show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | b|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | 3|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> >>> df.withColumn('a', df['a'].withField('d', lit(4))).select('a.d').show()</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | d|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> | 4|</span> |
| <span class="sd"> +---+</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">fieldName</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_STR"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"fieldName"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">fieldName</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Column</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_COLUMN"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"col"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">withField</span><span class="p">(</span><span class="n">fieldName</span><span class="p">,</span> <span class="n">col</span><span class="o">.</span><span class="n">_jc</span><span class="p">))</span></div> |
| |
| <div class="viewcode-block" id="Column.dropFields"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.dropFields.html#pyspark.sql.Column.dropFields">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">dropFields</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">fieldNames</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that drops fields in :class:`StructType` by name.</span> |
| <span class="sd"> This is a no-op if the schema doesn't contain field name(s).</span> |
| |
| <span class="sd"> .. versionadded:: 3.1.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> fieldNames : str</span> |
| <span class="sd"> Desired field names (collects all positional arguments passed)</span> |
| <span class="sd"> The result will drop at a location if any field matches in the Column.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column with field dropped by fieldName.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Row</span> |
| <span class="sd"> >>> from pyspark.sql.functions import col, lit</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... Row(a=Row(b=1, c=2, d=3, e=Row(f=4, g=5, h=6)))])</span> |
| <span class="sd"> >>> df.withColumn('a', df['a'].dropFields('b')).show()</span> |
| <span class="sd"> +-----------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +-----------------+</span> |
| <span class="sd"> |{2, 3, {4, 5, 6}}|</span> |
| <span class="sd"> +-----------------+</span> |
| |
| <span class="sd"> >>> df.withColumn('a', df['a'].dropFields('b', 'c')).show()</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> |{3, {4, 5, 6}}|</span> |
| <span class="sd"> +--------------+</span> |
| |
| <span class="sd"> This method supports dropping multiple nested fields directly e.g.</span> |
| |
| <span class="sd"> >>> df.withColumn("a", col("a").dropFields("e.g", "e.h")).show()</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> |{1, 2, 3, {4}}|</span> |
| <span class="sd"> +--------------+</span> |
| |
| <span class="sd"> However, if you are going to add/replace multiple nested fields,</span> |
| <span class="sd"> it is preferred to extract out the nested struct before</span> |
| <span class="sd"> adding/replacing multiple fields e.g.</span> |
| |
| <span class="sd"> >>> df.select(col("a").withField(</span> |
| <span class="sd"> ... "e", col("a.e").dropFields("g", "h")).alias("a")</span> |
| <span class="sd"> ... ).show()</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> | a|</span> |
| <span class="sd"> +--------------+</span> |
| <span class="sd"> |{1, 2, 3, {4}}|</span> |
| <span class="sd"> +--------------+</span> |
| |
| <span class="sd"> """</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">dropFields</span><span class="p">(</span><span class="n">_to_seq</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">fieldNames</span><span class="p">))</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.__getattr__"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.__getattr__.html#pyspark.sql.Column.__getattr__">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets an item at position ``ordinal`` out of a list,</span> |
| <span class="sd"> or gets an item by key out of a dict.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> item</span> |
| <span class="sd"> a literal value.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing the item got by key out of a dict.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])</span> |
| <span class="sd"> >>> df.select(df.d.key).show()</span> |
| <span class="sd"> +------+</span> |
| <span class="sd"> |d[key]|</span> |
| <span class="sd"> +------+</span> |
| <span class="sd"> | value|</span> |
| <span class="sd"> +------+</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="n">item</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"__"</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">PySparkAttributeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"CANNOT_ACCESS_TO_DUNDER"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{},</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="p">[</span><span class="n">item</span><span class="p">]</span></div> |
| |
| <div class="viewcode-block" id="Column.__getitem__"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.__getitem__.html#pyspark.sql.Column.__getitem__">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> An expression that gets an item at position ``ordinal`` out of a list,</span> |
| <span class="sd"> or gets an item by key out of a dict.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> k</span> |
| <span class="sd"> a literal value, or a slice object without step.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing the item got by key out of a dict, or substrings sliced by</span> |
| <span class="sd"> the given slice object.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])</span> |
| <span class="sd"> >>> df.select(df.l[slice(1, 3)], df.d['key']).show()</span> |
| <span class="sd"> +------------------+------+</span> |
| <span class="sd"> |substring(l, 1, 3)|d[key]|</span> |
| <span class="sd"> +------------------+------+</span> |
| <span class="sd"> | abc| value|</span> |
| <span class="sd"> +------------------+------+</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">slice</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">k</span><span class="o">.</span><span class="n">step</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"SLICE_WITH_STEP"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{},</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">substr</span><span class="p">(</span><span class="n">k</span><span class="o">.</span><span class="n">start</span><span class="p">,</span> <span class="n">k</span><span class="o">.</span><span class="n">stop</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"apply"</span><span class="p">)(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></div> |
| |
| <span class="k">def</span><span class="w"> </span><span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_ITERABLE"</span><span class="p">,</span> <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"objectName"</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">}</span> |
| <span class="p">)</span> |
| |
| <span class="c1"># string methods</span> |
| <span class="n">_contains_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Contains the other element. Returns a boolean :class:`Column` based on a string match.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other</span> |
| <span class="s2"> string in line. A value as a literal or a :class:`Column`.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> df = spark.createDataFrame(</span> |
| <span class="s2"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="s2"> >>> df.filter(df.name.contains('o')).collect()</span> |
| <span class="s2"> [Row(age=5, name='Bob')]</span> |
| <span class="s2"> """</span> |
| <span class="n">_startswith_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> String starts with. Returns a boolean :class:`Column` based on a string match.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other : :class:`Column` or str</span> |
| <span class="s2"> string at start of line (do not use a regex `^`)</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> df = spark.createDataFrame(</span> |
| <span class="s2"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="s2"> >>> df.filter(df.name.startswith('Al')).collect()</span> |
| <span class="s2"> [Row(age=2, name='Alice')]</span> |
| <span class="s2"> >>> df.filter(df.name.startswith('^Al')).collect()</span> |
| <span class="s2"> []</span> |
| <span class="s2"> """</span> |
| <span class="n">_endswith_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> String ends with. Returns a boolean :class:`Column` based on a string match.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Parameters</span> |
| <span class="s2"> ----------</span> |
| <span class="s2"> other : :class:`Column` or str</span> |
| <span class="s2"> string at end of line (do not use a regex `$`)</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> df = spark.createDataFrame(</span> |
| <span class="s2"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="s2"> >>> df.filter(df.name.endswith('ice')).collect()</span> |
| <span class="s2"> [Row(age=2, name='Alice')]</span> |
| <span class="s2"> >>> df.filter(df.name.endswith('ice$')).collect()</span> |
| <span class="s2"> []</span> |
| <span class="s2"> """</span> |
| |
| <span class="n">contains</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"contains"</span><span class="p">,</span> <span class="n">_contains_doc</span><span class="p">)</span> |
| <span class="n">startswith</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"startsWith"</span><span class="p">,</span> <span class="n">_startswith_doc</span><span class="p">)</span> |
| <span class="n">endswith</span> <span class="o">=</span> <span class="n">_bin_op</span><span class="p">(</span><span class="s2">"endsWith"</span><span class="p">,</span> <span class="n">_endswith_doc</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="Column.like"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.like.html#pyspark.sql.Column.like">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">like</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : str</span> |
| <span class="sd"> a SQL LIKE pattern</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.Column.rlike</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element</span> |
| <span class="sd"> in the Column is matched by SQL LIKE pattern.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.like('Al%')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="n">njc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"like"</span><span class="p">)(</span><span class="n">other</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">njc</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.rlike"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.rlike.html#pyspark.sql.Column.rlike">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">rlike</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex</span> |
| <span class="sd"> match.</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : str</span> |
| <span class="sd"> an extended regex expression</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element</span> |
| <span class="sd"> in the Column is matched by extended regex expression.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.rlike('ice$')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="n">njc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"rlike"</span><span class="p">)(</span><span class="n">other</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">njc</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.ilike"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.ilike.html#pyspark.sql.Column.ilike">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">ilike</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> SQL ILIKE expression (case insensitive LIKE). Returns a boolean :class:`Column`</span> |
| <span class="sd"> based on a case insensitive match.</span> |
| |
| <span class="sd"> .. versionadded:: 3.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> other : str</span> |
| <span class="sd"> a SQL LIKE pattern</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.Column.rlike</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element</span> |
| <span class="sd"> in the Column is matched by SQL LIKE pattern.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.filter(df.name.ilike('%Ice')).collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="n">njc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"ilike"</span><span class="p">)(</span><span class="n">other</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">njc</span><span class="p">)</span></div> |
| |
| <span class="nd">@overload</span> |
| <span class="k">def</span><span class="w"> </span><span class="nf">substr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">startPos</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">length</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <span class="nd">@overload</span> |
| <span class="k">def</span><span class="w"> </span><span class="nf">substr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">startPos</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">length</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="o">...</span> |
| |
| <div class="viewcode-block" id="Column.substr"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.substr.html#pyspark.sql.Column.substr">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">substr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">startPos</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="s2">"Column"</span><span class="p">],</span> <span class="n">length</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="s2">"Column"</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return a :class:`Column` which is a substring of the column.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> startPos : :class:`Column` or int</span> |
| <span class="sd"> start position</span> |
| <span class="sd"> length : :class:`Column` or int</span> |
| <span class="sd"> length of the substring</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is substr of origin Column.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name.substr(1, 3).alias("col")).collect()</span> |
| <span class="sd"> [Row(col='Ali'), Row(col='Bob')]</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">startPos</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">type</span><span class="p">(</span><span class="n">length</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_SAME_TYPE"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span> |
| <span class="s2">"arg_name1"</span><span class="p">:</span> <span class="s2">"startPos"</span><span class="p">,</span> |
| <span class="s2">"arg_name2"</span><span class="p">:</span> <span class="s2">"length"</span><span class="p">,</span> |
| <span class="s2">"arg_type1"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">startPos</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> |
| <span class="s2">"arg_type2"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">length</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span> |
| <span class="p">},</span> |
| <span class="p">)</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">startPos</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">substr</span><span class="p">(</span><span class="n">startPos</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">startPos</span><span class="p">,</span> <span class="n">Column</span><span class="p">):</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">substr</span><span class="p">(</span><span class="n">startPos</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="n">cast</span><span class="p">(</span><span class="s2">"Column"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span><span class="o">.</span><span class="n">_jc</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_COLUMN_OR_INT"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"startPos"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">startPos</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.isin"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.isin.html#pyspark.sql.Column.isin">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">isin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">cols</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> A boolean expression that is evaluated to true if the value of this</span> |
| <span class="sd"> expression is contained by the evaluated values of the arguments.</span> |
| |
| <span class="sd"> .. versionadded:: 1.5.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> cols</span> |
| <span class="sd"> The result will only be true at a location if any value matches in the Column.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element in the Column is contained in cols.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df[df.name.isin("Bob", "Mike")].collect()</span> |
| <span class="sd"> [Row(age=5, name='Bob')]</span> |
| <span class="sd"> >>> df[df.age.isin([1, 2, 3])].collect()</span> |
| <span class="sd"> [Row(age=2, name='Alice')]</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cols</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">set</span><span class="p">)):</span> |
| <span class="n">cols</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">Tuple</span><span class="p">,</span> <span class="n">cols</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> |
| <span class="n">cols</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Tuple</span><span class="p">,</span> |
| <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">_jc</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">Column</span><span class="p">)</span> <span class="k">else</span> <span class="n">_create_column_from_literal</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">],</span> |
| <span class="p">)</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"isin"</span><span class="p">)(</span><span class="n">_to_seq</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">cols</span><span class="p">))</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <span class="c1"># order</span> |
| <span class="n">_asc_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Returns a sort expression based on the ascending order of the column.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])</span> |
| <span class="s2"> >>> df.select(df.name).orderBy(df.name.asc()).collect()</span> |
| <span class="s2"> [Row(name='Alice'), Row(name='Tom')]</span> |
| <span class="s2"> """</span> |
| <span class="n">_asc_nulls_first_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Returns a sort expression based on ascending order of the column, and null values</span> |
| <span class="s2"> return before non-null values.</span> |
| |
| <span class="s2"> .. versionadded:: 2.4.0</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="s2"> >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()</span> |
| <span class="s2"> [Row(name=None), Row(name='Alice'), Row(name='Tom')]</span> |
| |
| <span class="s2"> """</span> |
| <span class="n">_asc_nulls_last_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Returns a sort expression based on ascending order of the column, and null values</span> |
| <span class="s2"> appear after non-null values.</span> |
| |
| <span class="s2"> .. versionadded:: 2.4.0</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="s2"> >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()</span> |
| <span class="s2"> [Row(name='Alice'), Row(name='Tom'), Row(name=None)]</span> |
| |
| <span class="s2"> """</span> |
| <span class="n">_desc_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Returns a sort expression based on the descending order of the column.</span> |
| |
| <span class="s2"> .. versionadded:: 2.4.0</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])</span> |
| <span class="s2"> >>> df.select(df.name).orderBy(df.name.desc()).collect()</span> |
| <span class="s2"> [Row(name='Tom'), Row(name='Alice')]</span> |
| <span class="s2"> """</span> |
| <span class="n">_desc_nulls_first_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Returns a sort expression based on the descending order of the column, and null values</span> |
| <span class="s2"> appear before non-null values.</span> |
| |
| <span class="s2"> .. versionadded:: 2.4.0</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="s2"> >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()</span> |
| <span class="s2"> [Row(name=None), Row(name='Tom'), Row(name='Alice')]</span> |
| |
| <span class="s2"> """</span> |
| <span class="n">_desc_nulls_last_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> Returns a sort expression based on the descending order of the column, and null values</span> |
| <span class="s2"> appear after non-null values.</span> |
| |
| <span class="s2"> .. versionadded:: 2.4.0</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])</span> |
| <span class="s2"> >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()</span> |
| <span class="s2"> [Row(name='Tom'), Row(name='Alice'), Row(name=None)]</span> |
| <span class="s2"> """</span> |
| |
| <span class="n">asc</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"asc"</span><span class="p">,</span> <span class="n">_asc_doc</span><span class="p">)</span> |
| <span class="n">asc_nulls_first</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"asc_nulls_first"</span><span class="p">,</span> <span class="n">_asc_nulls_first_doc</span><span class="p">)</span> |
| <span class="n">asc_nulls_last</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"asc_nulls_last"</span><span class="p">,</span> <span class="n">_asc_nulls_last_doc</span><span class="p">)</span> |
| <span class="n">desc</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"desc"</span><span class="p">,</span> <span class="n">_desc_doc</span><span class="p">)</span> |
| <span class="n">desc_nulls_first</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"desc_nulls_first"</span><span class="p">,</span> <span class="n">_desc_nulls_first_doc</span><span class="p">)</span> |
| <span class="n">desc_nulls_last</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"desc_nulls_last"</span><span class="p">,</span> <span class="n">_desc_nulls_last_doc</span><span class="p">)</span> |
| |
| <span class="n">_isNull_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> True if the current expression is null.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])</span> |
| <span class="s2"> >>> df.filter(df.height.isNull()).collect()</span> |
| <span class="s2"> [Row(name='Alice', height=None)]</span> |
| <span class="s2"> """</span> |
| <span class="n">_isNotNull_doc</span> <span class="o">=</span> <span class="s2">"""</span> |
| <span class="s2"> True if the current expression is NOT null.</span> |
| |
| <span class="s2"> .. versionchanged:: 3.4.0</span> |
| <span class="s2"> Supports Spark Connect.</span> |
| |
| <span class="s2"> Examples</span> |
| <span class="s2"> --------</span> |
| <span class="s2"> >>> from pyspark.sql import Row</span> |
| <span class="s2"> >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])</span> |
| <span class="s2"> >>> df.filter(df.height.isNotNull()).collect()</span> |
| <span class="s2"> [Row(name='Tom', height=80)]</span> |
| <span class="s2"> """</span> |
| |
| <span class="n">isNull</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"isNull"</span><span class="p">,</span> <span class="n">_isNull_doc</span><span class="p">)</span> |
| <span class="n">isNotNull</span> <span class="o">=</span> <span class="n">_unary_op</span><span class="p">(</span><span class="s2">"isNotNull"</span><span class="p">,</span> <span class="n">_isNotNull_doc</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="Column.alias"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.alias.html#pyspark.sql.Column.alias">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">alias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">alias</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns this column aliased with a new name or names (in the case of expressions that</span> |
| <span class="sd"> return more than one column, such as explode).</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> alias : str</span> |
| <span class="sd"> desired column names (collects all positional arguments passed)</span> |
| |
| <span class="sd"> Other Parameters</span> |
| <span class="sd"> ----------------</span> |
| <span class="sd"> metadata: dict</span> |
| <span class="sd"> a dict of information to be stored in ``metadata`` attribute of the</span> |
| <span class="sd"> corresponding :class:`StructField <pyspark.sql.types.StructField>` (optional, keyword</span> |
| <span class="sd"> only argument)</span> |
| |
| <span class="sd"> .. versionchanged:: 2.2.0</span> |
| <span class="sd"> Added optional ``metadata`` argument.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is aliased with new name or names.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.age.alias("age2")).collect()</span> |
| <span class="sd"> [Row(age2=2), Row(age2=5)]</span> |
| <span class="sd"> >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max']</span> |
| <span class="sd"> 99</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">metadata</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"metadata"</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <span class="k">assert</span> <span class="ow">not</span> <span class="n">kwargs</span><span class="p">,</span> <span class="s2">"Unexpected kwargs where passed: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">kwargs</span> |
| |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">get_active_spark_context</span><span class="p">()</span> |
| <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">alias</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">metadata</span><span class="p">:</span> |
| <span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| <span class="n">jmeta</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">Metadata</span><span class="o">.</span><span class="n">fromJson</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">metadata</span><span class="p">))</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"as"</span><span class="p">)(</span><span class="n">alias</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">jmeta</span><span class="p">))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"as"</span><span class="p">)(</span><span class="n">alias</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">metadata</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"ONLY_ALLOWED_FOR_SINGLE_COLUMN"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"metadata"</span><span class="p">},</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="s2">"as"</span><span class="p">)(</span><span class="n">_to_seq</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="nb">list</span><span class="p">(</span><span class="n">alias</span><span class="p">))))</span></div> |
| |
| <span class="n">name</span> <span class="o">=</span> <span class="n">copy_func</span><span class="p">(</span><span class="n">alias</span><span class="p">,</span> <span class="n">sinceversion</span><span class="o">=</span><span class="mf">2.0</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="s2">":func:`name` is an alias for :func:`alias`."</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="Column.cast"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.cast.html#pyspark.sql.Column.cast">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">cast</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataType</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataType</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Casts the column into type ``dataType``.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dataType : :class:`DataType` or str</span> |
| <span class="sd"> a DataType or Python string literal with a DDL-formatted string</span> |
| <span class="sd"> to use when parsing the column to the same type.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is cast into new type.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql.types import StringType</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.age.cast("string").alias('ages')).collect()</span> |
| <span class="sd"> [Row(ages='2'), Row(ages='5')]</span> |
| <span class="sd"> >>> df.select(df.age.cast(StringType()).alias('ages')).collect()</span> |
| <span class="sd"> [Row(ages='2'), Row(ages='5')]</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dataType</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">dataType</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dataType</span><span class="p">,</span> <span class="n">DataType</span><span class="p">):</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql</span><span class="w"> </span><span class="kn">import</span> <span class="n">SparkSession</span> |
| |
| <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">_getActiveSessionOrCreate</span><span class="p">()</span> |
| <span class="n">jdt</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="o">.</span><span class="n">parseDataType</span><span class="p">(</span><span class="n">dataType</span><span class="o">.</span><span class="n">json</span><span class="p">())</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">jdt</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_DATATYPE_OR_STR"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"dataType"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">dataType</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <span class="n">astype</span> <span class="o">=</span> <span class="n">copy_func</span><span class="p">(</span><span class="n">cast</span><span class="p">,</span> <span class="n">sinceversion</span><span class="o">=</span><span class="mf">1.4</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="s2">":func:`astype` is an alias for :func:`cast`."</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="Column.between"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.between.html#pyspark.sql.Column.between">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">between</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">lowerBound</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">],</span> |
| <span class="n">upperBound</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Column"</span><span class="p">,</span> <span class="s2">"LiteralType"</span><span class="p">,</span> <span class="s2">"DateTimeLiteral"</span><span class="p">,</span> <span class="s2">"DecimalLiteral"</span><span class="p">],</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> True if the current column is between the lower bound and upper bound, inclusive.</span> |
| |
| <span class="sd"> .. versionadded:: 1.3.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> lowerBound : :class:`Column`, int, float, string, bool, datetime, date or Decimal</span> |
| <span class="sd"> a boolean expression that boundary start, inclusive.</span> |
| <span class="sd"> upperBound : :class:`Column`, int, float, string, bool, datetime, date or Decimal</span> |
| <span class="sd"> a boolean expression that boundary end, inclusive.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column of booleans showing whether each element of Column</span> |
| <span class="sd"> is between left and right (inclusive).</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name, df.age.between(2, 4)).show()</span> |
| <span class="sd"> +-----+---------------------------+</span> |
| <span class="sd"> | name|((age >= 2) AND (age <= 4))|</span> |
| <span class="sd"> +-----+---------------------------+</span> |
| <span class="sd"> |Alice| true|</span> |
| <span class="sd"> | Bob| false|</span> |
| <span class="sd"> +-----+---------------------------+</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="p">(</span><span class="bp">self</span> <span class="o">>=</span> <span class="n">lowerBound</span><span class="p">)</span> <span class="o">&</span> <span class="p">(</span><span class="bp">self</span> <span class="o"><=</span> <span class="n">upperBound</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.when"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.when.html#pyspark.sql.Column.when">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">when</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">condition</span><span class="p">:</span> <span class="s2">"Column"</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Evaluates a list of conditions and returns one of multiple possible result expressions.</span> |
| <span class="sd"> If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> condition : :class:`Column`</span> |
| <span class="sd"> a boolean :class:`Column` expression.</span> |
| <span class="sd"> value</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is in conditions.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name, sf.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show()</span> |
| <span class="sd"> +-----+------------------------------------------------------------+</span> |
| <span class="sd"> | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0 END|</span> |
| <span class="sd"> +-----+------------------------------------------------------------+</span> |
| <span class="sd"> |Alice| -1|</span> |
| <span class="sd"> | Bob| 1|</span> |
| <span class="sd"> +-----+------------------------------------------------------------+</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.functions.when</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">condition</span><span class="p">,</span> <span class="n">Column</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_COLUMN"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"condition"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| <span class="n">v</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">_jc</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">Column</span><span class="p">)</span> <span class="k">else</span> <span class="n">value</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">condition</span><span class="o">.</span><span class="n">_jc</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.otherwise"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.otherwise.html#pyspark.sql.Column.otherwise">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Evaluates a list of conditions and returns one of multiple possible result expressions.</span> |
| <span class="sd"> If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> value</span> |
| <span class="sd"> a literal value, or a :class:`Column` expression.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| <span class="sd"> Column representing whether each element of Column is unmatched conditions.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import functions as sf</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.select(df.name, sf.when(df.age > 3, 1).otherwise(0)).show()</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| <span class="sd"> | name|CASE WHEN (age > 3) THEN 1 ELSE 0 END|</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| <span class="sd"> |Alice| 0|</span> |
| <span class="sd"> | Bob| 1|</span> |
| <span class="sd"> +-----+-------------------------------------+</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> pyspark.sql.functions.when</span> |
| <span class="sd"> """</span> |
| <span class="n">v</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">_jc</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">Column</span><span class="p">)</span> <span class="k">else</span> <span class="n">value</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="Column.over"><a class="viewcode-back" href="../../../reference/pyspark.sql/api/pyspark.sql.Column.over.html#pyspark.sql.Column.over">[docs]</a> <span class="k">def</span><span class="w"> </span><span class="nf">over</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">window</span><span class="p">:</span> <span class="s2">"WindowSpec"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Column"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Define a windowing column.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> .. versionchanged:: 3.4.0</span> |
| <span class="sd"> Supports Spark Connect.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> window : :class:`WindowSpec`</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> :class:`Column`</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.sql import Window</span> |
| <span class="sd"> >>> window = (</span> |
| <span class="sd"> ... Window.partitionBy("name")</span> |
| <span class="sd"> ... .orderBy("age")</span> |
| <span class="sd"> ... .rowsBetween(Window.unboundedPreceding, Window.currentRow)</span> |
| <span class="sd"> ... )</span> |
| <span class="sd"> >>> from pyspark.sql.functions import rank, min, desc</span> |
| <span class="sd"> >>> df = spark.createDataFrame(</span> |
| <span class="sd"> ... [(2, "Alice"), (5, "Bob")], ["age", "name"])</span> |
| <span class="sd"> >>> df.withColumn(</span> |
| <span class="sd"> ... "rank", rank().over(window)</span> |
| <span class="sd"> ... ).withColumn(</span> |
| <span class="sd"> ... "min", min('age').over(window)</span> |
| <span class="sd"> ... ).sort(desc("age")).show()</span> |
| <span class="sd"> +---+-----+----+---+</span> |
| <span class="sd"> |age| name|rank|min|</span> |
| <span class="sd"> +---+-----+----+---+</span> |
| <span class="sd"> | 5| Bob| 1| 5|</span> |
| <span class="sd"> | 2|Alice| 1| 2|</span> |
| <span class="sd"> +---+-----+----+---+</span> |
| <span class="sd"> """</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql.window</span><span class="w"> </span><span class="kn">import</span> <span class="n">WindowSpec</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">WindowSpec</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="n">PySparkTypeError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"NOT_WINDOWSPEC"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">"arg_name"</span><span class="p">:</span> <span class="s2">"window"</span><span class="p">,</span> <span class="s2">"arg_type"</span><span class="p">:</span> <span class="nb">type</span><span class="p">(</span><span class="n">window</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">},</span> |
| <span class="p">)</span> |
| <span class="n">jc</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="o">.</span><span class="n">_jspec</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">Column</span><span class="p">(</span><span class="n">jc</span><span class="p">)</span></div> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">__nonzero__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">PySparkValueError</span><span class="p">(</span> |
| <span class="n">error_class</span><span class="o">=</span><span class="s2">"CANNOT_CONVERT_COLUMN_INTO_BOOL"</span><span class="p">,</span> |
| <span class="n">message_parameters</span><span class="o">=</span><span class="p">{},</span> |
| <span class="p">)</span> |
| |
| <span class="fm">__bool__</span> <span class="o">=</span> <span class="n">__nonzero__</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="k">return</span> <span class="s2">"Column<'</span><span class="si">%s</span><span class="s2">'>"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jc</span><span class="o">.</span><span class="n">toString</span><span class="p">()</span></div> |
| |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">_test</span><span class="p">()</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">doctest</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">pyspark.sql</span><span class="w"> </span><span class="kn">import</span> <span class="n">SparkSession</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">pyspark.sql.column</span> |
| |
| <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> |
| <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[4]"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"sql.column tests"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> |
| <span class="n">globs</span><span class="p">[</span><span class="s2">"spark"</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span> |
| |
| <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span> |
| <span class="n">pyspark</span><span class="o">.</span><span class="n">sql</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> |
| <span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> |
| <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">REPORT_NDIFF</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> |
| <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> |
| <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> |
| |
| |
| <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> |
| <span class="n">_test</span><span class="p">()</span> |
| </pre></div> |
| |
| </div> |
| |
| |
| <!-- Previous / next buttons --> |
| <div class='prev-next-area'> |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script> |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| |
| <div class="footer-item"> |
| <p class="copyright"> |
| © Copyright .<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br> |
| </p> |
| </div> |
| |
| </div> |
| </footer> |
| </body> |
| </html> |