| |
| |
| <!DOCTYPE html> |
| |
| |
| <html > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>pyspark.pandas.base — PySpark 4.0.0-preview2 documentation</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; |
| </script> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| |
| |
| <link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" /> |
| |
| <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> |
| |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" /> |
| <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" /> |
| |
| <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> |
| <script src="../../../_static/jquery.js"></script> |
| <script src="../../../_static/underscore.js"></script> |
| <script src="../../../_static/doctools.js"></script> |
| <script src="../../../_static/clipboard.min.js"></script> |
| <script src="../../../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/pandas/base';</script> |
| <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/pandas/base.html" /> |
| <link rel="search" title="Search" href="../../../search.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="None"> |
| |
| |
| <!-- Matomo --> |
| <script type="text/javascript"> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '40']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <a class="skip-link" href="#main-content">Skip to main content</a> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__primary" |
| id="__primary"/> |
| <label class="overlay overlay-primary" for="__primary"></label> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__secondary" |
| id="__secondary"/> |
| <label class="overlay overlay-secondary" for="__secondary"></label> |
| |
| <div class="search-button__wrapper"> |
| <div class="search-button__overlay"></div> |
| <div class="search-button__search-container"> |
| <form class="bd-search d-flex align-items-center" |
| action="../../../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| id="search-input" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form></div> |
| </div> |
| |
| <nav class="bd-header navbar navbar-expand-lg bd-navbar"> |
| <div class="bd-header__inner bd-page-width"> |
| <label class="sidebar-toggle primary-toggle" for="__primary"> |
| <span class="fa-solid fa-bars"></span> |
| </label> |
| |
| <div class="navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| <a class="navbar-brand logo" href="../../../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="https://spark.apache.org/images/spark-logo.png" class="logo__image only-light" alt="Logo image"/> |
| <script>document.write(`<img src="https://spark.apache.org/images/spark-logo-rev.svg" class="logo__image only-dark" alt="Logo image"/>`);</script> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| |
| <div class="col-lg-9 navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"><nav class="navbar-nav"> |
| <p class="sidebar-header-items__title" |
| role="heading" |
| aria-level="1" |
| aria-label="Site Navigation"> |
| Site Navigation |
| </p> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| <div class="navbar-item"><!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 4.0.0-preview2 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/pandas/base.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script></div> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> |
| <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> |
| <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> |
| <label class="sr-only">GitHub</label></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> |
| <label class="sr-only">PyPI</label></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| |
| </div> |
| |
| </nav> |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| <div class="bd-sidebar-primary bd-sidebar hide-on-wide"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| <div class="navbar-item"><nav class="navbar-nav"> |
| <p class="sidebar-header-items__title" |
| role="heading" |
| aria-level="1" |
| aria-label="Site Navigation"> |
| Site Navigation |
| </p> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 4.0.0-preview2 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/pandas/base.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script></div> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> |
| <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> |
| <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> |
| <label class="sr-only">GitHub</label></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> |
| <label class="sr-only">PyPI</label></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| </div> |
| |
| <div id="rtd-footer-container"></div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| |
| |
| <nav aria-label="Breadcrumbs"> |
| <ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../../../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page">pyspark.pandas.base</li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article" role="main"> |
| |
| <h1>Source code for pyspark.pandas.base</h1><div class="highlight"><pre> |
| <span></span><span class="c1">#</span> |
| <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> |
| <span class="c1"># contributor license agreements. See the NOTICE file distributed with</span> |
| <span class="c1"># this work for additional information regarding copyright ownership.</span> |
| <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> |
| <span class="c1"># (the "License"); you may not use this file except in compliance with</span> |
| <span class="c1"># the License. You may obtain a copy of the License at</span> |
| <span class="c1">#</span> |
| <span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="c1">#</span> |
| <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> |
| <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="c1"># See the License for the specific language governing permissions and</span> |
| <span class="c1"># limitations under the License.</span> |
| <span class="c1">#</span> |
| |
| <span class="sd">"""</span> |
| <span class="sd">Base and utility classes for pandas-on-Spark objects.</span> |
| <span class="sd">"""</span> |
| <span class="kn">import</span> <span class="nn">warnings</span> |
| <span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span> |
| <span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">wraps</span><span class="p">,</span> <span class="n">partial</span> |
| <span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">chain</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">cast</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span> |
| |
| <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> |
| <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> |
| <span class="kn">from</span> <span class="nn">pandas.api.types</span> <span class="kn">import</span> <span class="n">is_list_like</span><span class="p">,</span> <span class="n">CategoricalDtype</span> <span class="c1"># type: ignore[attr-defined]</span> |
| |
| <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span><span class="p">,</span> <span class="n">Column</span><span class="p">,</span> <span class="n">Window</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="n">LongType</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">,</span> <span class="n">NumericType</span> |
| <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">ps</span> <span class="c1"># For running doctests and reference resolution in PyCharm.</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas._typing</span> <span class="kn">import</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">Dtype</span><span class="p">,</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">Label</span><span class="p">,</span> <span class="n">SeriesOrIndex</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.config</span> <span class="kn">import</span> <span class="n">get_option</span><span class="p">,</span> <span class="n">option_context</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.internal</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">InternalField</span><span class="p">,</span> |
| <span class="n">InternalFrame</span><span class="p">,</span> |
| <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> |
| <span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.spark.accessors</span> <span class="kn">import</span> <span class="n">SparkIndexOpsMethods</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.typedef</span> <span class="kn">import</span> <span class="n">extension_dtypes</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.utils</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">combine_frames</span><span class="p">,</span> |
| <span class="n">same_anchor</span><span class="p">,</span> |
| <span class="n">scol_for</span><span class="p">,</span> |
| <span class="n">validate_axis</span><span class="p">,</span> |
| <span class="n">ERROR_MESSAGE_CANNOT_COMBINE</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span> |
| |
| <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql._typing</span> <span class="kn">import</span> <span class="n">ColumnOrName</span> |
| |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.data_type_ops.base</span> <span class="kn">import</span> <span class="n">DataTypeOps</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span> |
| |
| |
| <span class="k">def</span> <span class="nf">should_alignment_for_column_op</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">SeriesOrIndex</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">SeriesOrIndex</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span> |
| |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span> |
| <span class="k">return</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span> <span class="ow">is</span> <span class="ow">not</span> <span class="n">other</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span> |
| |
| |
| <span class="k">def</span> <span class="nf">align_diff_index_ops</span><span class="p">(</span> |
| <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Column</span><span class="p">],</span> <span class="n">this_index_ops</span><span class="p">:</span> <span class="n">SeriesOrIndex</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Align the `IndexOpsMixin` objects and apply the function.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> func : The function to apply</span> |
| <span class="sd"> this_index_ops : IndexOpsMixin</span> |
| <span class="sd"> A base `IndexOpsMixin` object</span> |
| <span class="sd"> args : list of other arguments including other `IndexOpsMixin` objects</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> `Index` if all `this_index_ops` and arguments are `Index`; otherwise `Series`</span> |
| <span class="sd"> """</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.indexes</span> <span class="kn">import</span> <span class="n">Index</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span><span class="p">,</span> <span class="n">first_series</span> |
| |
| <span class="n">cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">arg</span> <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">IndexOpsMixin</span><span class="p">)]</span> |
| |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">this_index_ops</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">all</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">):</span> |
| <span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span> |
| <span class="n">this_index_ops</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> |
| <span class="o">*</span><span class="p">[</span><span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">cols</span><span class="p">)],</span> |
| <span class="n">how</span><span class="o">=</span><span class="s2">"full"</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">column_op</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span> |
| <span class="n">combined</span><span class="p">[</span><span class="s2">"this"</span><span class="p">]</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">combined</span><span class="p">[</span><span class="s2">"this"</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span> |
| <span class="o">*</span><span class="p">[</span> |
| <span class="n">combined</span><span class="p">[</span><span class="s2">"that"</span><span class="p">]</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">combined</span><span class="p">[</span><span class="s2">"that"</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span> |
| <span class="p">],</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">this_index_ops</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="c1"># This could cause as many counts, reset_index calls, joins for combining</span> |
| <span class="c1"># as the number of `Index`s in `args`. So far it's fine since we can assume the ops</span> |
| <span class="c1"># only work between at most two `Index`s. We might need to fix it in the future.</span> |
| |
| <span class="n">self_len</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">this_index_ops</span><span class="p">)</span> |
| <span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="o">!=</span> <span class="n">self_len</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">IndexOpsMixin</span><span class="p">)):</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"operands could not be broadcast together with shapes"</span><span class="p">)</span> |
| |
| <span class="k">with</span> <span class="n">option_context</span><span class="p">(</span><span class="s2">"compute.default_index_type"</span><span class="p">,</span> <span class="s2">"distributed-sequence"</span><span class="p">):</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">this_index_ops</span><span class="p">,</span> <span class="n">Index</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">all</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Index</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">Index</span><span class="p">(</span> |
| <span class="n">column_op</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span> |
| <span class="n">this_index_ops</span><span class="o">.</span><span class="n">to_series</span><span class="p">()</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <span class="o">*</span><span class="p">[</span> |
| <span class="n">arg</span><span class="o">.</span><span class="n">to_series</span><span class="p">()</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">Index</span><span class="p">)</span> |
| <span class="k">else</span> <span class="n">arg</span> |
| <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span> |
| <span class="p">],</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(),</span> |
| <span class="n">name</span><span class="o">=</span><span class="n">this_index_ops</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">this_index_ops</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span> |
| <span class="n">this</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">this_index_ops</span><span class="o">.</span><span class="n">reset_index</span><span class="p">())</span> |
| <span class="n">that</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">col</span><span class="o">.</span><span class="n">to_series</span><span class="p">()</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Index</span><span class="p">)</span> <span class="k">else</span> <span class="n">col</span><span class="p">)</span> |
| <span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> |
| <span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span> |
| <span class="p">]</span> |
| |
| <span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="n">this</span><span class="p">,</span> <span class="o">*</span><span class="n">that</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">"full"</span><span class="p">)</span><span class="o">.</span><span class="n">sort_index</span><span class="p">()</span> |
| <span class="n">combined</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span> |
| <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[:</span> <span class="n">this_index_ops</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">]</span> |
| <span class="p">)</span> |
| <span class="n">combined</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">names</span> <span class="o">=</span> <span class="n">this_index_ops</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span> |
| |
| <span class="k">return</span> <span class="n">column_op</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span> |
| <span class="n">first_series</span><span class="p">(</span><span class="n">combined</span><span class="p">[</span><span class="s2">"this"</span><span class="p">]),</span> |
| <span class="o">*</span><span class="p">[</span> |
| <span class="n">combined</span><span class="p">[</span><span class="s2">"that"</span><span class="p">]</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">combined</span><span class="p">[</span><span class="s2">"that"</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span> |
| <span class="p">],</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">this_index_ops</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">this</span> <span class="o">=</span> <span class="n">this_index_ops</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| |
| <span class="n">that_series</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">col</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Series</span><span class="p">))</span> |
| <span class="n">that_frame</span> <span class="o">=</span> <span class="n">that_series</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[</span> |
| <span class="p">[</span> |
| <span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">col</span><span class="o">.</span><span class="n">to_series</span><span class="p">()</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Index</span><span class="p">)</span> <span class="k">else</span> <span class="n">col</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">cols</span><span class="p">)</span> |
| <span class="p">]</span> |
| <span class="p">]</span> |
| |
| <span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="n">this</span><span class="p">,</span> <span class="n">that_frame</span><span class="o">.</span><span class="n">reset_index</span><span class="p">())</span><span class="o">.</span><span class="n">sort_index</span><span class="p">()</span> |
| |
| <span class="n">self_index</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">combined</span><span class="p">[</span><span class="s2">"this"</span><span class="p">]</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="n">combined</span><span class="p">[</span><span class="s2">"this"</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">)</span><span class="o">.</span><span class="n">index</span> |
| <span class="p">)</span> |
| |
| <span class="n">other</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">"that"</span><span class="p">]</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span> |
| <span class="n">combined</span><span class="p">[</span><span class="s2">"that"</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[:</span> <span class="n">that_series</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">]</span> |
| <span class="p">)</span> |
| <span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">names</span> <span class="o">=</span> <span class="n">that_series</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span> |
| |
| <span class="k">return</span> <span class="n">column_op</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span> |
| <span class="n">self_index</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">[</span> |
| <span class="n">other</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">label</span><span class="p">,</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">,</span> <span class="n">cols</span><span class="p">)</span> |
| <span class="p">],</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">that_series</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> |
| |
| |
| <span class="k">def</span> <span class="nf">booleanize_null</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">,</span> <span class="n">f</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Booleanize Null in Spark Column</span> |
| <span class="sd"> """</span> |
| <span class="n">comp_ops</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="nb">getattr</span><span class="p">(</span><span class="n">Column</span><span class="p">,</span> <span class="s2">"__</span><span class="si">{}</span><span class="s2">__"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">comp_op</span><span class="p">))</span> |
| <span class="k">for</span> <span class="n">comp_op</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">"eq"</span><span class="p">,</span> <span class="s2">"ne"</span><span class="p">,</span> <span class="s2">"lt"</span><span class="p">,</span> <span class="s2">"le"</span><span class="p">,</span> <span class="s2">"ge"</span><span class="p">,</span> <span class="s2">"gt"</span><span class="p">]</span> |
| <span class="p">]</span> |
| |
| <span class="k">if</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">comp_ops</span><span class="p">:</span> |
| <span class="c1"># if `f` is "!=", fill null with True otherwise False</span> |
| <span class="n">filler</span> <span class="o">=</span> <span class="n">f</span> <span class="o">==</span> <span class="n">Column</span><span class="o">.</span><span class="fm">__ne__</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">filler</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">scol</span> |
| |
| |
| <span class="k">def</span> <span class="nf">column_op</span><span class="p">(</span><span class="n">f</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">SeriesOrIndex</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> A decorator that wraps APIs taking/returning Spark Column so that pandas-on-Spark Series can be</span> |
| <span class="sd"> supported too. If this decorator is used for the `f` function that takes Spark Column and</span> |
| <span class="sd"> returns Spark Column, decorated `f` takes pandas-on-Spark Series as well and returns</span> |
| <span class="sd"> pandas-on-Spark Series.</span> |
| |
| <span class="sd"> :param f: a function that takes Spark Column and returns Spark Column.</span> |
| <span class="sd"> :param self: pandas-on-Spark Series</span> |
| <span class="sd"> :param args: arguments that the function `f` takes.</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@wraps</span><span class="p">(</span><span class="n">f</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">wrapper</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">SeriesOrIndex</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.indexes.base</span> <span class="kn">import</span> <span class="n">Index</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span> |
| |
| <span class="c1"># It is possible for the function `f` to take other arguments than Spark Column.</span> |
| <span class="c1"># To cover this case, explicitly check if the argument is pandas-on-Spark Series and</span> |
| <span class="c1"># extract Spark Column. For other arguments, they are used as are.</span> |
| <span class="n">cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">arg</span> <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">Index</span><span class="p">))]</span> |
| |
| <span class="k">if</span> <span class="nb">all</span><span class="p">(</span><span class="ow">not</span> <span class="n">should_alignment_for_column_op</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">):</span> |
| <span class="c1"># Same DataFrame anchors</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">[</span><span class="n">arg</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">IndexOpsMixin</span><span class="p">)</span> <span class="k">else</span> <span class="n">arg</span> <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">],</span> |
| <span class="p">)</span> |
| |
| <span class="n">field</span> <span class="o">=</span> <span class="n">InternalField</span><span class="o">.</span><span class="n">from_struct_field</span><span class="p">(</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">schema</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> |
| <span class="n">use_extension_dtypes</span><span class="o">=</span><span class="nb">any</span><span class="p">(</span> |
| <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">extension_dtypes</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="p">[</span><span class="bp">self</span><span class="p">]</span> <span class="o">+</span> <span class="n">cols</span> |
| <span class="p">),</span> |
| <span class="p">)</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">field</span><span class="o">.</span><span class="n">is_extension_dtype</span><span class="p">:</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="n">booleanize_null</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">f</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">any</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">):</span> |
| <span class="n">index_ops</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">psser</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">col</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">Series</span><span class="p">))</span> |
| <span class="n">index_ops</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">)</span> |
| <span class="k">elif</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">"compute.ops_on_diff_frames"</span><span class="p">):</span> |
| <span class="n">index_ops</span> <span class="o">=</span> <span class="n">align_diff_index_ops</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">ERROR_MESSAGE_CANNOT_COMBINE</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="n">col</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">):</span> |
| <span class="n">index_ops</span> <span class="o">=</span> <span class="n">index_ops</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">index_ops</span> |
| |
| <span class="k">return</span> <span class="n">wrapper</span> |
| |
| |
| <span class="k">def</span> <span class="nf">numpy_column_op</span><span class="p">(</span><span class="n">f</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">SeriesOrIndex</span><span class="p">]:</span> |
| <span class="nd">@wraps</span><span class="p">(</span><span class="n">f</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">wrapper</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">SeriesOrIndex</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="c1"># PySpark does not support NumPy type out of the box. For now, we convert NumPy types</span> |
| <span class="c1"># into some primitive types understandable in PySpark.</span> |
| <span class="n">new_args</span> <span class="o">=</span> <span class="p">[]</span> |
| <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">:</span> |
| <span class="c1"># TODO: This is a quick hack to support NumPy type. We should revisit this.</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">LongType</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">timedelta64</span><span class="p">):</span> |
| <span class="n">new_args</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">arg</span> <span class="o">/</span> <span class="n">np</span><span class="o">.</span><span class="n">timedelta64</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"s"</span><span class="p">)))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">new_args</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">column_op</span><span class="p">(</span><span class="n">f</span><span class="p">)(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">new_args</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">wrapper</span> |
| |
| |
| <span class="k">class</span> <span class="nc">IndexOpsMixin</span><span class="p">(</span><span class="nb">object</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""common ops mixin to support a unified interface / docs for Series / Index</span> |
| |
| <span class="sd"> Assuming there are following attributes or properties and functions.</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@abstractmethod</span> |
| <span class="k">def</span> <span class="nf">_internal</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">InternalFrame</span><span class="p">:</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@abstractmethod</span> |
| <span class="k">def</span> <span class="nf">_psdf</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@abstractmethod</span> |
| <span class="k">def</span> <span class="nf">_with_new_scol</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">InternalField</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@abstractmethod</span> |
| <span class="k">def</span> <span class="nf">_column_label</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="n">Label</span><span class="p">]:</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@abstractmethod</span> |
| <span class="k">def</span> <span class="nf">spark</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">SparkIndexOpsMethods</span><span class="p">[</span><span class="n">IndexOpsLike</span><span class="p">]:</span> |
| <span class="k">pass</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">_dtype_op</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DataTypeOps"</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.data_type_ops.base</span> <span class="kn">import</span> <span class="n">DataTypeOps</span> |
| |
| <span class="k">return</span> <span class="n">DataTypeOps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">)</span> |
| |
| <span class="nd">@abstractmethod</span> |
| <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="k">pass</span> |
| |
| <span class="c1"># arithmetic operators</span> |
| <span class="k">def</span> <span class="fm">__neg__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">neg</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__sub__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__mul__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__truediv__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __truediv__ has different behaviour between pandas and PySpark for several cases.</span> |
| <span class="sd"> 1. When dividing np.inf by zero, PySpark returns null whereas pandas returns np.inf</span> |
| <span class="sd"> 2. When dividing a positive number by zero, PySpark returns null</span> |
| <span class="sd"> whereas pandas returns np.inf</span> |
| <span class="sd"> 3. When divide -np.inf by zero, PySpark returns null whereas pandas returns -np.inf</span> |
| <span class="sd"> 4. When divide negative number by zero, PySpark returns null whereas pandas returns -np.inf</span> |
| |
| <span class="sd"> +-------------------------------------------+</span> |
| <span class="sd"> | dividend (divisor: 0) | PySpark | pandas |</span> |
| <span class="sd"> |-----------------------|---------|---------|</span> |
| <span class="sd"> | np.inf | null | np.inf |</span> |
| <span class="sd"> | -np.inf | null | -np.inf |</span> |
| <span class="sd"> | 10 | null | np.inf |</span> |
| <span class="sd"> | -10 | null | -np.inf |</span> |
| <span class="sd"> +-----------------------|---------|---------+</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">truediv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__mod__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">mod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__radd__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">radd</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rsub__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rsub</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rmul__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rmul</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rtruediv__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rtruediv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__floordiv__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __floordiv__ has different behaviour between pandas and PySpark for several cases.</span> |
| <span class="sd"> 1. When dividing np.inf by zero, PySpark returns null whereas pandas returns np.inf</span> |
| <span class="sd"> 2. When dividing a positive number by zero, PySpark returns null</span> |
| <span class="sd"> whereas pandas returns np.inf</span> |
| <span class="sd"> 3. When divide -np.inf by zero, PySpark returns null whereas pandas returns -np.inf</span> |
| <span class="sd"> 4. When divide negative number by zero, PySpark returns null whereas pandas returns -np.inf</span> |
| |
| <span class="sd"> +-------------------------------------------+</span> |
| <span class="sd"> | dividend (divisor: 0) | PySpark | pandas |</span> |
| <span class="sd"> |-----------------------|---------|---------|</span> |
| <span class="sd"> | np.inf | null | np.inf |</span> |
| <span class="sd"> | -np.inf | null | -np.inf |</span> |
| <span class="sd"> | 10 | null | np.inf |</span> |
| <span class="sd"> | -10 | null | -np.inf |</span> |
| <span class="sd"> +-----------------------|---------|---------+</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">floordiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rfloordiv__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rfloordiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rmod__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__pow__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">pow</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rpow__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rpow</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__abs__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> |
| |
| <span class="c1"># comparison operators</span> |
| <span class="k">def</span> <span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> <span class="c1"># type: ignore[override]</span> |
| <span class="c1"># pandas always returns False for all items with dict and set.</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="p">(</span><span class="nb">dict</span><span class="p">,</span> <span class="nb">set</span><span class="p">)):</span> |
| <span class="k">return</span> <span class="bp">self</span> <span class="o">!=</span> <span class="bp">self</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__ne__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> <span class="c1"># type: ignore[override]</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__lt__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">lt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__le__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__ge__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">ge</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__gt__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__invert__</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">invert</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> |
| |
| <span class="c1"># `and`, `or`, `not` cannot be overloaded in Python,</span> |
| <span class="c1"># so use bitwise operators as boolean operators</span> |
| <span class="k">def</span> <span class="fm">__and__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="fm">__and__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__or__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="fm">__or__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rand__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__ror__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">ror</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__xor__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">xor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__rxor__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">rxor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">)</span> |
| |
| <span class="c1"># NDArray Compat</span> |
| <span class="k">def</span> <span class="nf">__array_ufunc__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">ufunc</span><span class="p">:</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="n">inputs</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">SeriesOrIndex</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas</span> <span class="kn">import</span> <span class="n">numpy_compat</span> |
| |
| <span class="c1"># Try dunder methods first.</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">numpy_compat</span><span class="o">.</span><span class="n">maybe_dispatch_ufunc_to_dunder_op</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">ufunc</span><span class="p">,</span> <span class="n">method</span><span class="p">,</span> <span class="o">*</span><span class="n">inputs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span> |
| <span class="p">)</span> |
| |
| <span class="c1"># After that, we try with PySpark APIs.</span> |
| <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="bp">NotImplemented</span><span class="p">:</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">numpy_compat</span><span class="o">.</span><span class="n">maybe_dispatch_ufunc_to_spark_func</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> <span class="n">ufunc</span><span class="p">,</span> <span class="n">method</span><span class="p">,</span> <span class="o">*</span><span class="n">inputs</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span> |
| <span class="p">)</span> |
| |
| <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">NotImplemented</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">SeriesOrIndex</span><span class="p">,</span> <span class="n">result</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="c1"># TODO: support more APIs?</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span> |
| <span class="s2">"pandas-on-Spark objects currently do not support </span><span class="si">%s</span><span class="s2">."</span> <span class="o">%</span> <span class="n">ufunc</span> |
| <span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">dtype</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dtype</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""Return the dtype object of the underlying data.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> s = ps.Series([1, 2, 3])</span> |
| <span class="sd"> >>> s.dtype</span> |
| <span class="sd"> dtype('int64')</span> |
| |
| <span class="sd"> >>> s = ps.Series(list('abc'))</span> |
| <span class="sd"> >>> s.dtype</span> |
| <span class="sd"> dtype('O')</span> |
| |
| <span class="sd"> >>> s = ps.Series(pd.date_range('20130101', periods=3))</span> |
| <span class="sd"> >>> s.dtype</span> |
| <span class="sd"> dtype('<M8[ns]')</span> |
| |
| <span class="sd"> >>> s.rename("a").to_frame().set_index("a").index.dtype</span> |
| <span class="sd"> dtype('<M8[ns]')</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">dtype</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">empty</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns true if the current object is empty. Otherwise, it returns false.</span> |
| |
| <span class="sd"> >>> ps.range(10).id.empty</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.range(0).id.empty</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.DataFrame({}, index=list('abc')).index.empty</span> |
| <span class="sd"> False</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">isEmpty</span><span class="p">()</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">hasnans</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return True if it has any missing values. Otherwise, it returns False.</span> |
| |
| <span class="sd"> >>> ps.DataFrame({}, index=list('abc')).index.hasnans</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series(['a', None]).hasnans</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([1.0, 2.0, np.nan]).hasnans</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([1, 2, 3]).hasnans</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> (ps.Series([1.0, 2.0, np.nan]) + 1).hasnans</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([1, 2, 3]).rename("a").to_frame().set_index("a").index.hasnans</span> |
| <span class="sd"> False</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">is_monotonic_increasing</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return boolean if values in the object are monotonically increasing.</span> |
| |
| <span class="sd"> .. note:: the current implementation of is_monotonic_increasing requires to shuffle</span> |
| <span class="sd"> and aggregate multiple times to check the order locally and globally,</span> |
| <span class="sd"> which is potentially expensive. In case of multi-index, all data is</span> |
| <span class="sd"> transferred to a single node which can easily cause out-of-memory errors.</span> |
| |
| <span class="sd"> .. note:: Disable the Spark config `spark.sql.optimizer.nestedSchemaPruning.enabled`</span> |
| <span class="sd"> for multi-index if you're using pandas-on-Spark < 1.7.0 with PySpark 3.1.1.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> is_monotonic : bool</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ser = ps.Series(['1/1/2018', '3/1/2018', '4/1/2018'])</span> |
| <span class="sd"> >>> ser.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> df = ps.DataFrame({'dates': [None, '1/1/2018', '2/1/2018', '3/1/2018']})</span> |
| <span class="sd"> >>> df.dates.is_monotonic_increasing</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> df.index.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser = ps.Series([1])</span> |
| <span class="sd"> >>> ser.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser = ps.Series([])</span> |
| <span class="sd"> >>> ser.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser.rename("a").to_frame().set_index("a").index.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser = ps.Series([5, 4, 3, 2, 1], index=[1, 2, 3, 4, 5])</span> |
| <span class="sd"> >>> ser.is_monotonic_increasing</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ser.index.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> Support for MultiIndex</span> |
| |
| <span class="sd"> >>> midx = ps.MultiIndex.from_tuples(</span> |
| <span class="sd"> ... [('x', 'a'), ('x', 'b'), ('y', 'c'), ('y', 'd'), ('z', 'e')])</span> |
| <span class="sd"> >>> midx # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([('x', 'a'),</span> |
| <span class="sd"> ('x', 'b'),</span> |
| <span class="sd"> ('y', 'c'),</span> |
| <span class="sd"> ('y', 'd'),</span> |
| <span class="sd"> ('z', 'e')],</span> |
| <span class="sd"> )</span> |
| <span class="sd"> >>> midx.is_monotonic_increasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> midx = ps.MultiIndex.from_tuples(</span> |
| <span class="sd"> ... [('z', 'a'), ('z', 'b'), ('y', 'c'), ('y', 'd'), ('x', 'e')])</span> |
| <span class="sd"> >>> midx # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([('z', 'a'),</span> |
| <span class="sd"> ('z', 'b'),</span> |
| <span class="sd"> ('y', 'c'),</span> |
| <span class="sd"> ('y', 'd'),</span> |
| <span class="sd"> ('x', 'e')],</span> |
| <span class="sd"> )</span> |
| <span class="sd"> >>> midx.is_monotonic_increasing</span> |
| <span class="sd"> False</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_is_monotonic</span><span class="p">(</span><span class="s2">"increasing"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">is_monotonic_decreasing</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return boolean if values in the object are monotonically decreasing.</span> |
| |
| <span class="sd"> .. note:: the current implementation of is_monotonic_decreasing requires to shuffle</span> |
| <span class="sd"> and aggregate multiple times to check the order locally and globally,</span> |
| <span class="sd"> which is potentially expensive. In case of multi-index, all data is transferred</span> |
| <span class="sd"> to a single node which can easily cause out-of-memory errors.</span> |
| |
| <span class="sd"> .. note:: Disable the Spark config `spark.sql.optimizer.nestedSchemaPruning.enabled`</span> |
| <span class="sd"> for multi-index if you're using pandas-on-Spark < 1.7.0 with PySpark 3.1.1.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> is_monotonic : bool</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ser = ps.Series(['4/1/2018', '3/1/2018', '1/1/2018'])</span> |
| <span class="sd"> >>> ser.is_monotonic_decreasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> df = ps.DataFrame({'dates': [None, '3/1/2018', '2/1/2018', '1/1/2018']})</span> |
| <span class="sd"> >>> df.dates.is_monotonic_decreasing</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> df.index.is_monotonic_decreasing</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ser = ps.Series([1])</span> |
| <span class="sd"> >>> ser.is_monotonic_decreasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser = ps.Series([])</span> |
| <span class="sd"> >>> ser.is_monotonic_decreasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser.rename("a").to_frame().set_index("a").index.is_monotonic_decreasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser = ps.Series([5, 4, 3, 2, 1], index=[1, 2, 3, 4, 5])</span> |
| <span class="sd"> >>> ser.is_monotonic_decreasing</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ser.index.is_monotonic_decreasing</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> Support for MultiIndex</span> |
| |
| <span class="sd"> >>> midx = ps.MultiIndex.from_tuples(</span> |
| <span class="sd"> ... [('x', 'a'), ('x', 'b'), ('y', 'c'), ('y', 'd'), ('z', 'e')])</span> |
| <span class="sd"> >>> midx # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([('x', 'a'),</span> |
| <span class="sd"> ('x', 'b'),</span> |
| <span class="sd"> ('y', 'c'),</span> |
| <span class="sd"> ('y', 'd'),</span> |
| <span class="sd"> ('z', 'e')],</span> |
| <span class="sd"> )</span> |
| <span class="sd"> >>> midx.is_monotonic_decreasing</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> midx = ps.MultiIndex.from_tuples(</span> |
| <span class="sd"> ... [('z', 'e'), ('z', 'd'), ('y', 'c'), ('y', 'b'), ('x', 'a')])</span> |
| <span class="sd"> >>> midx # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([('z', 'a'),</span> |
| <span class="sd"> ('z', 'b'),</span> |
| <span class="sd"> ('y', 'c'),</span> |
| <span class="sd"> ('y', 'd'),</span> |
| <span class="sd"> ('x', 'e')],</span> |
| <span class="sd"> )</span> |
| <span class="sd"> >>> midx.is_monotonic_decreasing</span> |
| <span class="sd"> True</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_is_monotonic</span><span class="p">(</span><span class="s2">"decreasing"</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">_is_locally_monotonic_spark_column</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">order</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> |
| <span class="n">window</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_id"</span><span class="p">))</span> |
| <span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span> |
| <span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span> |
| <span class="p">)</span> |
| |
| <span class="k">if</span> <span class="n">order</span> <span class="o">==</span> <span class="s2">"increasing"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">)</span> <span class="o">>=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span> <span class="o">&</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span> |
| <span class="s2">"__origin"</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">)</span> <span class="o"><=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span> <span class="o">&</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span> |
| <span class="s2">"__origin"</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span> |
| |
| <span class="k">def</span> <span class="nf">_is_monotonic</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">order</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="k">assert</span> <span class="n">order</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"increasing"</span><span class="p">,</span> <span class="s2">"decreasing"</span><span class="p">)</span> |
| |
| <span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span> |
| |
| <span class="n">sdf</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">spark_partition_id</span><span class="p">()</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span> |
| <span class="s2">"__partition_id"</span> |
| <span class="p">),</span> <span class="c1"># Make sure we use the same partition id in the whole job.</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">),</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">),</span> |
| <span class="p">)</span> |
| <span class="o">.</span><span class="n">select</span><span class="p">(</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_id"</span><span class="p">),</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">),</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_is_locally_monotonic_spark_column</span><span class="p">(</span><span class="n">order</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span> |
| <span class="s2">"__comparison_within_partition"</span> |
| <span class="p">),</span> |
| <span class="p">)</span> |
| <span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_id"</span><span class="p">))</span> |
| <span class="o">.</span><span class="n">agg</span><span class="p">(</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"__partition_min"</span><span class="p">),</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__origin"</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"__partition_max"</span><span class="p">),</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__comparison_within_partition"</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">True</span><span class="p">)))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span> |
| <span class="s2">"__comparison_within_partition"</span> |
| <span class="p">),</span> |
| <span class="p">)</span> |
| <span class="p">)</span> |
| |
| <span class="c1"># Now we're windowing the aggregation results without partition specification.</span> |
| <span class="c1"># The number of rows here will be the same as partitions, which is expected</span> |
| <span class="c1"># to be small.</span> |
| <span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_id"</span><span class="p">))</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">order</span> <span class="o">==</span> <span class="s2">"increasing"</span><span class="p">:</span> |
| <span class="n">comparison_col</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_min"</span><span class="p">)</span> <span class="o">>=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_max"</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span> |
| <span class="n">window</span> |
| <span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">comparison_col</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_min"</span><span class="p">)</span> <span class="o"><=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__partition_max"</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span> |
| <span class="n">window</span> |
| <span class="p">)</span> |
| |
| <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span> |
| <span class="n">comparison_col</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"__comparison_between_partitions"</span><span class="p">),</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__comparison_within_partition"</span><span class="p">),</span> |
| <span class="p">)</span> |
| |
| <span class="n">ret</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__comparison_between_partitions"</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">True</span><span class="p">)))</span> |
| <span class="o">&</span> <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"__comparison_within_partition"</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">True</span><span class="p">)))</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> |
| <span class="k">if</span> <span class="n">ret</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">ret</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">ndim</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return an int representing the number of array dimensions.</span> |
| |
| <span class="sd"> Return 1 for Series / Index / MultiIndex.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| |
| <span class="sd"> For Series</span> |
| |
| <span class="sd"> >>> s = ps.Series([None, 1, 2, 3, 4], index=[4, 5, 2, 1, 8])</span> |
| <span class="sd"> >>> s.ndim</span> |
| <span class="sd"> 1</span> |
| |
| <span class="sd"> For Index</span> |
| |
| <span class="sd"> >>> s.index.ndim</span> |
| <span class="sd"> 1</span> |
| |
| <span class="sd"> For MultiIndex</span> |
| |
| <span class="sd"> >>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],</span> |
| <span class="sd"> ... ['speed', 'weight', 'length']],</span> |
| <span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span> |
| <span class="sd"> ... [1, 1, 1, 1, 1, 2, 1, 2, 2]])</span> |
| <span class="sd"> >>> s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)</span> |
| <span class="sd"> >>> s.index.ndim</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="mi">1</span> |
| |
| <span class="k">def</span> <span class="nf">astype</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">dtype</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">type</span><span class="p">,</span> <span class="n">Dtype</span><span class="p">])</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Cast a pandas-on-Spark object to a specified dtype ``dtype``.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dtype : data type</span> |
| <span class="sd"> Use a numpy.dtype or Python type to cast entire pandas object to</span> |
| <span class="sd"> the same type.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> casted : same type as caller</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> to_datetime : Convert argument to datetime.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ser = ps.Series([1, 2], dtype='int32')</span> |
| <span class="sd"> >>> ser</span> |
| <span class="sd"> 0 1</span> |
| <span class="sd"> 1 2</span> |
| <span class="sd"> dtype: int32</span> |
| |
| <span class="sd"> >>> ser.astype('int64')</span> |
| <span class="sd"> 0 1</span> |
| <span class="sd"> 1 2</span> |
| <span class="sd"> dtype: int64</span> |
| |
| <span class="sd"> >>> ser.rename("a").to_frame().set_index("a").index.astype('int64')</span> |
| <span class="sd"> Index([1, 2], dtype='int64', name='a')</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dtype</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">isin</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">values</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Check whether `values` are contained in Series or Index.</span> |
| |
| <span class="sd"> Return a boolean Series or Index showing whether each element in the Series</span> |
| <span class="sd"> matches an element in the passed sequence of `values` exactly.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> values : set or list-like</span> |
| <span class="sd"> The sequence of values to test.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> isin : Series (bool dtype) or Index (bool dtype)</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> s = ps.Series(['lama', 'cow', 'lama', 'beetle', 'lama',</span> |
| <span class="sd"> ... 'hippo'], name='animal')</span> |
| <span class="sd"> >>> s.isin(['cow', 'lama'])</span> |
| <span class="sd"> 0 True</span> |
| <span class="sd"> 1 True</span> |
| <span class="sd"> 2 True</span> |
| <span class="sd"> 3 False</span> |
| <span class="sd"> 4 True</span> |
| <span class="sd"> 5 False</span> |
| <span class="sd"> Name: animal, dtype: bool</span> |
| |
| <span class="sd"> Passing a single string as ``s.isin('lama')`` will raise an error. Use</span> |
| <span class="sd"> a list of one element instead:</span> |
| |
| <span class="sd"> >>> s.isin(['lama'])</span> |
| <span class="sd"> 0 True</span> |
| <span class="sd"> 1 False</span> |
| <span class="sd"> 2 True</span> |
| <span class="sd"> 3 False</span> |
| <span class="sd"> 4 True</span> |
| <span class="sd"> 5 False</span> |
| <span class="sd"> Name: animal, dtype: bool</span> |
| |
| <span class="sd"> >>> s.rename("a").to_frame().set_index("a").index.isin(['lama']) # doctest: +SKIP</span> |
| <span class="sd"> Index([True, False, True, False, True, False], dtype='bool', name='a')</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">values</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> |
| <span class="s2">"only list-like objects are allowed to be passed"</span> |
| <span class="s2">" to isin(), you passed a [</span><span class="si">{values_type}</span><span class="s2">]"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">values_type</span><span class="o">=</span><span class="nb">type</span><span class="p">(</span><span class="n">values</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> |
| <span class="p">)</span> |
| |
| <span class="n">values</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">cast</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">values</span><span class="p">)</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">)</span> <span class="k">else</span> <span class="nb">list</span><span class="p">(</span><span class="n">values</span><span class="p">)</span> |
| <span class="p">)</span> |
| |
| <span class="n">other</span> <span class="o">=</span> <span class="p">[</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">values</span><span class="p">]</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> |
| <span class="n">field</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span> |
| <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">dtype</span><span class="p">(</span><span class="s2">"bool"</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">=</span><span class="n">BooleanType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span> |
| <span class="p">)</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="o">=</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)),</span> <span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">isnull</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Detect existing (non-missing) values.</span> |
| |
| <span class="sd"> Return a boolean same-sized object indicating if the values are NA.</span> |
| <span class="sd"> NA values, such as None or numpy.NaN, get mapped to True values.</span> |
| <span class="sd"> Everything else gets mapped to False values. Characters such as empty strings '' or</span> |
| <span class="sd"> numpy.inf are not considered NA values</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> Series or Index : Mask of bool values for each element in Series</span> |
| <span class="sd"> that indicates whether an element is not an NA value.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ser = ps.Series([5, 6, np.nan])</span> |
| <span class="sd"> >>> ser.isna() # doctest: +NORMALIZE_WHITESPACE</span> |
| <span class="sd"> 0 False</span> |
| <span class="sd"> 1 False</span> |
| <span class="sd"> 2 True</span> |
| <span class="sd"> dtype: bool</span> |
| |
| <span class="sd"> >>> ser.rename("a").to_frame().set_index("a").index.isna() # doctest: +SKIP</span> |
| <span class="sd"> Index([False, False, True], dtype='bool', name='a')</span> |
| <span class="sd"> """</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.indexes</span> <span class="kn">import</span> <span class="n">MultiIndex</span> |
| |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">MultiIndex</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">"isna is not defined for MultiIndex"</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> |
| |
| <span class="n">isna</span> <span class="o">=</span> <span class="n">isnull</span> |
| |
| <span class="k">def</span> <span class="nf">notnull</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Detect existing (non-missing) values.</span> |
| <span class="sd"> Return a boolean same-sized object indicating if the values are not NA.</span> |
| <span class="sd"> Non-missing values get mapped to True.</span> |
| <span class="sd"> Characters such as empty strings '' or numpy.inf are not considered NA values</span> |
| <span class="sd"> NA values, such as None or numpy.NaN, get mapped to False values.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> Series or Index : Mask of bool values for each element in Series</span> |
| <span class="sd"> that indicates whether an element is not an NA value.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Show which entries in a Series are not NA.</span> |
| |
| <span class="sd"> >>> ser = ps.Series([5, 6, np.nan])</span> |
| <span class="sd"> >>> ser</span> |
| <span class="sd"> 0 5.0</span> |
| <span class="sd"> 1 6.0</span> |
| <span class="sd"> 2 NaN</span> |
| <span class="sd"> dtype: float64</span> |
| |
| <span class="sd"> >>> ser.notna()</span> |
| <span class="sd"> 0 True</span> |
| <span class="sd"> 1 True</span> |
| <span class="sd"> 2 False</span> |
| <span class="sd"> dtype: bool</span> |
| |
| <span class="sd"> >>> ser.rename("a").to_frame().set_index("a").index.notna() # doctest: +SKIP</span> |
| <span class="sd"> Index([True, True, False], dtype='bool', name='a')</span> |
| <span class="sd"> """</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.indexes</span> <span class="kn">import</span> <span class="n">MultiIndex</span> |
| |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">MultiIndex</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">"notna is not defined for MultiIndex"</span><span class="p">)</span> |
| <span class="k">return</span> <span class="p">(</span><span class="o">~</span><span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">())</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span> |
| |
| <span class="n">notna</span> <span class="o">=</span> <span class="n">notnull</span> |
| |
| <span class="c1"># TODO: axis and many arguments should be implemented.</span> |
| <span class="k">def</span> <span class="nf">all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return whether all elements are True.</span> |
| |
| <span class="sd"> Returns True unless there at least one element within a series that is</span> |
| <span class="sd"> False or equivalent (e.g. zero or empty)</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> axis : {0 or 'index'}, default 0</span> |
| <span class="sd"> Indicate which axis or axes should be reduced.</span> |
| |
| <span class="sd"> * 0 / 'index' : reduce the index, return a Series whose index is the</span> |
| <span class="sd"> original column labels.</span> |
| |
| <span class="sd"> skipna : boolean, default True</span> |
| <span class="sd"> Exclude NA values, such as None or numpy.NaN.</span> |
| <span class="sd"> If an entire row/column is NA values and `skipna` is True,</span> |
| <span class="sd"> then the result will be True, as for an empty row/column.</span> |
| <span class="sd"> If `skipna` is False, numpy.NaNs are treated as True because these are</span> |
| <span class="sd"> not equal to zero, Nones are treated as False.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ps.Series([True, True]).all()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([True, False]).all()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([0, 1]).all()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([1, 2, 3]).all()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([True, True, None]).all()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([True, True, None]).all(skipna=False)</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([True, False, None]).all()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([]).all()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([np.nan]).all()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([np.nan]).all(skipna=False)</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([None]).all()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([None]).all(skipna=False)</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> df = ps.Series([True, False, None]).rename("a").to_frame()</span> |
| <span class="sd"> >>> df.set_index("a").index.all()</span> |
| <span class="sd"> False</span> |
| <span class="sd"> """</span> |
| <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'axis should be either 0 or "index" currently.'</span><span class="p">)</span> |
| |
| <span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> |
| <span class="n">col</span> <span class="o">=</span> <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">sdf</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> |
| |
| <span class="c1"># `any` and `every` was added as of Spark 3.0.</span> |
| <span class="c1"># ret = sdf.select(F.expr("every(CAST(`%s` AS BOOLEAN))" % sdf.columns[0])).collect()[0][0]</span> |
| <span class="c1"># We use min as its alternative as below.</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">)</span> <span class="ow">or</span> <span class="n">skipna</span><span class="p">:</span> |
| <span class="c1"># np.nan takes no effect to the result; None takes no effect if `skipna`</span> |
| <span class="n">ret</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">"boolean"</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">True</span><span class="p">))))</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="c1"># Take None as False when not `skipna`</span> |
| <span class="n">ret</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">"boolean"</span><span class="p">)))</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> |
| |
| <span class="k">if</span> <span class="n">ret</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">ret</span> |
| |
| <span class="c1"># TODO: axis, skipna, and many arguments should be implemented.</span> |
| <span class="k">def</span> <span class="nf">any</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return whether any element is True.</span> |
| |
| <span class="sd"> Returns False unless there is at least one element within a series that is</span> |
| <span class="sd"> True or equivalent (e.g. non-zero or non-empty).</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> axis : {0 or 'index'}, default 0</span> |
| <span class="sd"> Indicate which axis or axes should be reduced.</span> |
| |
| <span class="sd"> * 0 / 'index' : reduce the index, return a Series whose index is the</span> |
| <span class="sd"> original column labels.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ps.Series([False, False]).any()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([True, False]).any()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([0, 0]).any()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([0, 1, 2]).any()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([False, False, None]).any()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([True, False, None]).any()</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> >>> ps.Series([]).any()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> ps.Series([np.nan]).any()</span> |
| <span class="sd"> False</span> |
| |
| <span class="sd"> >>> df = ps.Series([True, False, None]).rename("a").to_frame()</span> |
| <span class="sd"> >>> df.set_index("a").index.any()</span> |
| <span class="sd"> True</span> |
| <span class="sd"> """</span> |
| <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'axis should be either 0 or "index" currently.'</span><span class="p">)</span> |
| |
| <span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> |
| <span class="n">col</span> <span class="o">=</span> <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">sdf</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> |
| |
| <span class="c1"># Note that we're ignoring `None`s here for now.</span> |
| <span class="c1"># any and every was added as of Spark 3.0</span> |
| <span class="c1"># ret = sdf.select(F.expr("any(CAST(`%s` AS BOOLEAN))" % sdf.columns[0])).collect()[0][0]</span> |
| <span class="c1"># Here we use max as its alternative:</span> |
| <span class="n">ret</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">col</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">"boolean"</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">))))</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> |
| <span class="k">if</span> <span class="n">ret</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">False</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">ret</span> |
| |
| <span class="c1"># TODO: add frep and axis parameter</span> |
| <span class="k">def</span> <span class="nf">shift</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Shift Series/Index by desired number of periods.</span> |
| |
| <span class="sd"> .. note:: the current implementation of shift uses Spark's Window without</span> |
| <span class="sd"> specifying partition specification. This leads to moveing all data into</span> |
| <span class="sd"> a single partition in a single machine and could cause serious</span> |
| <span class="sd"> performance degradation. Avoid this method with very large datasets.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> periods : int</span> |
| <span class="sd"> Number of periods to shift. Can be positive or negative.</span> |
| <span class="sd"> fill_value : object, optional</span> |
| <span class="sd"> The scalar value to use for newly introduced missing values.</span> |
| <span class="sd"> The default depends on the dtype of self. For numeric data, np.nan is used.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> Copy of input Series/Index, shifted.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> df = ps.DataFrame({'Col1': [10, 20, 15, 30, 45],</span> |
| <span class="sd"> ... 'Col2': [13, 23, 18, 33, 48],</span> |
| <span class="sd"> ... 'Col3': [17, 27, 22, 37, 52]},</span> |
| <span class="sd"> ... columns=['Col1', 'Col2', 'Col3'])</span> |
| |
| <span class="sd"> >>> df.Col1.shift(periods=3)</span> |
| <span class="sd"> 0 NaN</span> |
| <span class="sd"> 1 NaN</span> |
| <span class="sd"> 2 NaN</span> |
| <span class="sd"> 3 10.0</span> |
| <span class="sd"> 4 20.0</span> |
| <span class="sd"> Name: Col1, dtype: float64</span> |
| |
| <span class="sd"> >>> df.Col2.shift(periods=3, fill_value=0)</span> |
| <span class="sd"> 0 0</span> |
| <span class="sd"> 1 0</span> |
| <span class="sd"> 2 0</span> |
| <span class="sd"> 3 13</span> |
| <span class="sd"> 4 23</span> |
| <span class="sd"> Name: Col2, dtype: int64</span> |
| |
| <span class="sd"> >>> df.index.shift(periods=3, fill_value=0)</span> |
| <span class="sd"> Index([0, 0, 0, 0, 1], dtype='int64')</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_shift</span><span class="p">(</span><span class="n">periods</span><span class="p">,</span> <span class="n">fill_value</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span> |
| |
| <span class="k">def</span> <span class="nf">_shift</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> |
| <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> |
| <span class="n">fill_value</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">"ColumnOrName"</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">periods</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"periods should be an int; however, got [</span><span class="si">%s</span><span class="s2">]"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="n">periods</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> |
| |
| <span class="n">col</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> |
| <span class="n">window</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span> |
| <span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span> |
| <span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="n">periods</span><span class="p">,</span> <span class="o">-</span><span class="n">periods</span><span class="p">)</span> |
| <span class="p">)</span> |
| <span class="n">lag_col</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span> |
| <span class="n">col</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">lag_col</span><span class="o">.</span><span class="n">isNull</span><span class="p">()</span> <span class="o">|</span> <span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="n">lag_col</span><span class="p">),</span> <span class="n">fill_value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">lag_col</span><span class="p">)</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span> |
| |
| <span class="c1"># TODO: Update Documentation for Bins Parameter when its supported</span> |
| <span class="k">def</span> <span class="nf">value_counts</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="n">normalize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">sort</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">bins</span><span class="p">:</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"Series"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return a Series containing counts of unique values.</span> |
| <span class="sd"> The resulting object will be in descending order so that the</span> |
| <span class="sd"> first element is the most frequently-occurring element.</span> |
| <span class="sd"> Excludes NA values by default.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> normalize : boolean, default False</span> |
| <span class="sd"> If True then the object returned will contain the relative</span> |
| <span class="sd"> frequencies of the unique values.</span> |
| <span class="sd"> sort : boolean, default True</span> |
| <span class="sd"> Sort by values.</span> |
| <span class="sd"> ascending : boolean, default False</span> |
| <span class="sd"> Sort in ascending order.</span> |
| <span class="sd"> bins : Not Yet Supported</span> |
| <span class="sd"> dropna : boolean, default True</span> |
| <span class="sd"> Don't include counts of NaN.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> counts : Series</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> Series.count: Number of non-NA elements in a Series.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> For Series</span> |
| |
| <span class="sd"> >>> df = ps.DataFrame({'x':[0, 0, 1, 1, 1, np.nan]})</span> |
| <span class="sd"> >>> df.x.value_counts() # doctest: +NORMALIZE_WHITESPACE</span> |
| <span class="sd"> x</span> |
| <span class="sd"> 1.0 3</span> |
| <span class="sd"> 0.0 2</span> |
| <span class="sd"> Name: count, dtype: int64</span> |
| |
| <span class="sd"> With `normalize` set to `True`, returns the relative frequency by</span> |
| <span class="sd"> dividing all values by the sum of values.</span> |
| |
| <span class="sd"> >>> df.x.value_counts(normalize=True) # doctest: +NORMALIZE_WHITESPACE</span> |
| <span class="sd"> x</span> |
| <span class="sd"> 1.0 0.6</span> |
| <span class="sd"> 0.0 0.4</span> |
| <span class="sd"> Name: proportion, dtype: float64</span> |
| |
| <span class="sd"> **dropna**</span> |
| <span class="sd"> With `dropna` set to `False` we can also see NaN index values.</span> |
| |
| <span class="sd"> >>> df.x.value_counts(dropna=False) # doctest: +NORMALIZE_WHITESPACE</span> |
| <span class="sd"> x</span> |
| <span class="sd"> 1.0 3</span> |
| <span class="sd"> 0.0 2</span> |
| <span class="sd"> NaN 1</span> |
| <span class="sd"> Name: count, dtype: int64</span> |
| |
| <span class="sd"> For Index</span> |
| |
| <span class="sd"> >>> idx = ps.Index([3, 1, 2, 3, 4, np.nan])</span> |
| <span class="sd"> >>> idx</span> |
| <span class="sd"> Index([3.0, 1.0, 2.0, 3.0, 4.0, nan], dtype='float64')</span> |
| |
| <span class="sd"> >>> idx.value_counts().sort_index()</span> |
| <span class="sd"> 1.0 1</span> |
| <span class="sd"> 2.0 1</span> |
| <span class="sd"> 3.0 2</span> |
| <span class="sd"> 4.0 1</span> |
| <span class="sd"> Name: count, dtype: int64</span> |
| |
| <span class="sd"> **sort**</span> |
| |
| <span class="sd"> With `sort` set to `False`, the result wouldn't be sorted by number of count.</span> |
| |
| <span class="sd"> >>> idx.value_counts(sort=True).sort_index()</span> |
| <span class="sd"> 1.0 1</span> |
| <span class="sd"> 2.0 1</span> |
| <span class="sd"> 3.0 2</span> |
| <span class="sd"> 4.0 1</span> |
| <span class="sd"> Name: count, dtype: int64</span> |
| |
| <span class="sd"> **normalize**</span> |
| |
| <span class="sd"> With `normalize` set to `True`, returns the relative frequency by</span> |
| <span class="sd"> dividing all values by the sum of values.</span> |
| |
| <span class="sd"> >>> idx.value_counts(normalize=True).sort_index()</span> |
| <span class="sd"> 1.0 0.2</span> |
| <span class="sd"> 2.0 0.2</span> |
| <span class="sd"> 3.0 0.4</span> |
| <span class="sd"> 4.0 0.2</span> |
| <span class="sd"> Name: proportion, dtype: float64</span> |
| |
| <span class="sd"> **dropna**</span> |
| |
| <span class="sd"> With `dropna` set to `False` we can also see NaN index values.</span> |
| |
| <span class="sd"> >>> idx.value_counts(dropna=False).sort_index() # doctest: +SKIP</span> |
| <span class="sd"> 1.0 1</span> |
| <span class="sd"> 2.0 1</span> |
| <span class="sd"> 3.0 2</span> |
| <span class="sd"> 4.0 1</span> |
| <span class="sd"> NaN 1</span> |
| <span class="sd"> dtype: int64</span> |
| |
| <span class="sd"> For MultiIndex.</span> |
| |
| <span class="sd"> >>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],</span> |
| <span class="sd"> ... ['speed', 'weight', 'length']],</span> |
| <span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span> |
| <span class="sd"> ... [1, 1, 1, 1, 1, 2, 1, 2, 2]])</span> |
| <span class="sd"> >>> s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)</span> |
| <span class="sd"> >>> s.index # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([( 'lama', 'weight'),</span> |
| <span class="sd"> ( 'lama', 'weight'),</span> |
| <span class="sd"> ( 'lama', 'weight'),</span> |
| <span class="sd"> ( 'cow', 'weight'),</span> |
| <span class="sd"> ( 'cow', 'weight'),</span> |
| <span class="sd"> ( 'cow', 'length'),</span> |
| <span class="sd"> ('falcon', 'weight'),</span> |
| <span class="sd"> ('falcon', 'length'),</span> |
| <span class="sd"> ('falcon', 'length')],</span> |
| <span class="sd"> )</span> |
| |
| <span class="sd"> >>> s.index.value_counts().sort_index()</span> |
| <span class="sd"> (cow, length) 1</span> |
| <span class="sd"> (cow, weight) 2</span> |
| <span class="sd"> (falcon, length) 2</span> |
| <span class="sd"> (falcon, weight) 1</span> |
| <span class="sd"> (lama, weight) 3</span> |
| <span class="sd"> Name: count, dtype: int64</span> |
| |
| <span class="sd"> >>> s.index.value_counts(normalize=True).sort_index()</span> |
| <span class="sd"> (cow, length) 0.111111</span> |
| <span class="sd"> (cow, weight) 0.222222</span> |
| <span class="sd"> (falcon, length) 0.222222</span> |
| <span class="sd"> (falcon, weight) 0.111111</span> |
| <span class="sd"> (lama, weight) 0.333333</span> |
| <span class="sd"> Name: proportion, dtype: float64</span> |
| |
| <span class="sd"> If Index has name, keep the name up.</span> |
| |
| <span class="sd"> >>> idx = ps.Index([0, 0, 0, 1, 1, 2, 3], name='pandas-on-Spark')</span> |
| <span class="sd"> >>> idx.value_counts().sort_index()</span> |
| <span class="sd"> pandas-on-Spark</span> |
| <span class="sd"> 0 3</span> |
| <span class="sd"> 1 2</span> |
| <span class="sd"> 2 1</span> |
| <span class="sd"> 3 1</span> |
| <span class="sd"> Name: count, dtype: int64</span> |
| <span class="sd"> """</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">first_series</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.indexes.multi</span> <span class="kn">import</span> <span class="n">MultiIndex</span> |
| |
| <span class="k">if</span> <span class="n">bins</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">"value_counts currently does not support bins"</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="n">dropna</span><span class="p">:</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">MultiIndex</span><span class="p">):</span> |
| <span class="c1"># If even one StructField is null, that row should be dropped.</span> |
| <span class="n">index_spark_column_names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span> |
| <span class="n">spark_column</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> |
| <span class="n">cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">index_spark_column_name</span> <span class="ow">in</span> <span class="n">index_spark_column_names</span><span class="p">:</span> |
| <span class="n">cond</span> <span class="o">=</span> <span class="n">cond</span> <span class="o">|</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">getItem</span><span class="p">(</span><span class="n">index_spark_column_name</span><span class="p">)</span><span class="o">.</span><span class="n">isNull</span><span class="p">()</span> |
| <span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">spark_column</span><span class="p">)</span> |
| <span class="n">sdf_dropna</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="o">~</span><span class="n">cond</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">sdf_dropna</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">sdf_dropna</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> |
| <span class="n">index_name</span> <span class="o">=</span> <span class="n">SPARK_DEFAULT_INDEX_NAME</span> |
| <span class="n">column_name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf_dropna</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf_dropna</span><span class="p">,</span> <span class="n">column_name</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_name</span><span class="p">))</span><span class="o">.</span><span class="n">count</span><span class="p">()</span> |
| <span class="k">if</span> <span class="n">sort</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ascending</span><span class="p">:</span> |
| <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"count"</span><span class="p">))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"count"</span><span class="p">)</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span> |
| |
| <span class="k">if</span> <span class="n">normalize</span><span class="p">:</span> |
| <span class="n">result_column_name</span> <span class="o">=</span> <span class="s2">"proportion"</span> |
| <span class="n">drop_sum</span> <span class="o">=</span> <span class="n">sdf_dropna</span><span class="o">.</span><span class="n">count</span><span class="p">()</span> |
| <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="s2">"count"</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"count"</span><span class="p">)</span> <span class="o">/</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">drop_sum</span><span class="p">))</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">result_column_name</span> <span class="o">=</span> <span class="s2">"count"</span> |
| |
| <span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span> |
| <span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span> |
| <span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_name</span><span class="p">)],</span> |
| <span class="n">index_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">,</span> |
| <span class="n">column_labels</span><span class="o">=</span><span class="p">[(</span><span class="n">result_column_name</span><span class="p">,)],</span> |
| <span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">"count"</span><span class="p">)],</span> |
| <span class="n">column_label_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_label_names</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span> |
| |
| <span class="k">def</span> <span class="nf">nunique</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">approx</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">rsd</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return number of unique elements in the object.</span> |
| <span class="sd"> Excludes NA values by default.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dropna : bool, default True</span> |
| <span class="sd"> Don’t include NaN in the count.</span> |
| <span class="sd"> approx: bool, default False</span> |
| <span class="sd"> If False, will use the exact algorithm and return the exact number of unique.</span> |
| <span class="sd"> If True, it uses the HyperLogLog approximate algorithm, which is significantly faster</span> |
| <span class="sd"> for large amount of data.</span> |
| <span class="sd"> Note: This parameter is specific to pandas-on-Spark and is not found in pandas.</span> |
| <span class="sd"> rsd: float, default 0.05</span> |
| <span class="sd"> Maximum estimation error allowed in the HyperLogLog algorithm.</span> |
| <span class="sd"> Note: Just like ``approx`` this parameter is specific to pandas-on-Spark.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> int</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> DataFrame.nunique: Method nunique for DataFrame.</span> |
| <span class="sd"> Series.count: Count non-NA/null observations in the Series.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> ps.Series([1, 2, 3, np.nan]).nunique()</span> |
| <span class="sd"> 3</span> |
| |
| <span class="sd"> >>> ps.Series([1, 2, 3, np.nan]).nunique(dropna=False)</span> |
| <span class="sd"> 4</span> |
| |
| <span class="sd"> On big data, we recommend using the approximate algorithm to speed up this function.</span> |
| <span class="sd"> The result will be very close to the exact unique count.</span> |
| |
| <span class="sd"> >>> ps.Series([1, 2, 3, np.nan]).nunique(approx=True)</span> |
| <span class="sd"> 3</span> |
| |
| <span class="sd"> >>> idx = ps.Index([1, 1, 2, None])</span> |
| <span class="sd"> >>> idx</span> |
| <span class="sd"> Index([1.0, 1.0, 2.0, nan], dtype='float64')</span> |
| |
| <span class="sd"> >>> idx.nunique()</span> |
| <span class="sd"> 2</span> |
| |
| <span class="sd"> >>> idx.nunique(dropna=False)</span> |
| <span class="sd"> 3</span> |
| <span class="sd"> """</span> |
| <span class="n">res</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">_nunique</span><span class="p">(</span><span class="n">dropna</span><span class="p">,</span> <span class="n">approx</span><span class="p">,</span> <span class="n">rsd</span><span class="p">)])</span> |
| <span class="k">return</span> <span class="n">res</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> |
| |
| <span class="k">def</span> <span class="nf">_nunique</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">approx</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">rsd</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> |
| <span class="n">colname</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="n">count_fn</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span> |
| <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">],</span> |
| <span class="n">partial</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">approx_count_distinct</span><span class="p">,</span> <span class="n">rsd</span><span class="o">=</span><span class="n">rsd</span><span class="p">)</span> <span class="k">if</span> <span class="n">approx</span> <span class="k">else</span> <span class="n">F</span><span class="o">.</span><span class="n">countDistinct</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="k">if</span> <span class="n">dropna</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">count_fn</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colname</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="p">(</span> |
| <span class="n">count_fn</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> |
| <span class="o">+</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span> |
| <span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">colname</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">take</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">indices</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">])</span> <span class="o">-></span> <span class="n">IndexOpsLike</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Return the elements in the given *positional* indices along an axis.</span> |
| |
| <span class="sd"> This means that we are not indexing according to actual values in</span> |
| <span class="sd"> the index attribute of the object. We are indexing according to the</span> |
| <span class="sd"> actual position of the element in the object.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> indices : array-like</span> |
| <span class="sd"> An array of ints indicating which positions to take.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> taken : same type as caller</span> |
| <span class="sd"> An array-like containing the elements taken from the object.</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> DataFrame.loc : Select a subset of a DataFrame by labels.</span> |
| <span class="sd"> DataFrame.iloc : Select a subset of a DataFrame by positions.</span> |
| <span class="sd"> numpy.take : Take elements from an array along an axis.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| |
| <span class="sd"> Series</span> |
| |
| <span class="sd"> >>> psser = ps.Series([100, 200, 300, 400, 500])</span> |
| <span class="sd"> >>> psser</span> |
| <span class="sd"> 0 100</span> |
| <span class="sd"> 1 200</span> |
| <span class="sd"> 2 300</span> |
| <span class="sd"> 3 400</span> |
| <span class="sd"> 4 500</span> |
| <span class="sd"> dtype: int64</span> |
| |
| <span class="sd"> >>> psser.take([0, 2, 4]).sort_index()</span> |
| <span class="sd"> 0 100</span> |
| <span class="sd"> 2 300</span> |
| <span class="sd"> 4 500</span> |
| <span class="sd"> dtype: int64</span> |
| |
| <span class="sd"> Index</span> |
| |
| <span class="sd"> >>> psidx = ps.Index([100, 200, 300, 400, 500])</span> |
| <span class="sd"> >>> psidx</span> |
| <span class="sd"> Index([100, 200, 300, 400, 500], dtype='int64')</span> |
| |
| <span class="sd"> >>> psidx.take([0, 2, 4]).sort_values()</span> |
| <span class="sd"> Index([100, 300, 500], dtype='int64')</span> |
| |
| <span class="sd"> MultiIndex</span> |
| |
| <span class="sd"> >>> psmidx = ps.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("x", "c")])</span> |
| <span class="sd"> >>> psmidx # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([('x', 'a'),</span> |
| <span class="sd"> ('x', 'b'),</span> |
| <span class="sd"> ('x', 'c')],</span> |
| <span class="sd"> )</span> |
| |
| <span class="sd"> >>> psmidx.take([0, 2]) # doctest: +SKIP</span> |
| <span class="sd"> MultiIndex([('x', 'a'),</span> |
| <span class="sd"> ('x', 'c')],</span> |
| <span class="sd"> )</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">indices</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">indices</span><span class="p">,</span> <span class="p">(</span><span class="nb">dict</span><span class="p">,</span> <span class="nb">set</span><span class="p">)):</span> |
| <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"`indices` must be a list-like except dict or set"</span><span class="p">)</span> |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> |
| <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">IndexOpsLike</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">indices</span><span class="p">])</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">IndexOpsLike</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">indices</span><span class="p">]</span><span class="o">.</span><span class="n">index</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">factorize</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">:</span> <span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">sort</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">use_na_sentinel</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="n">IndexOpsLike</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Encode the object as an enumerated type or categorical variable.</span> |
| |
| <span class="sd"> This method is useful for obtaining a numeric representation of an</span> |
| <span class="sd"> array when all that matters is identifying distinct values.</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> sort : bool, default True</span> |
| <span class="sd"> use_na_sentinel : bool, default True</span> |
| <span class="sd"> If True, the sentinel -1 will be used for NaN values, effectively assigning them</span> |
| <span class="sd"> a distinct category. If False, NaN values will be encoded as non-negative integers,</span> |
| <span class="sd"> treating them as unique categories in the encoding process and retaining them in the</span> |
| <span class="sd"> set of unique categories in the data.</span> |
| |
| <span class="sd"> Returns</span> |
| <span class="sd"> -------</span> |
| <span class="sd"> codes : Series or Index</span> |
| <span class="sd"> A Series or Index that's an indexer into `uniques`.</span> |
| <span class="sd"> ``uniques.take(codes)`` will have the same values as `values`.</span> |
| <span class="sd"> uniques : pd.Index</span> |
| <span class="sd"> The unique valid values.</span> |
| |
| <span class="sd"> .. note ::</span> |
| |
| <span class="sd"> Even if there's a missing value in `values`, `uniques` will</span> |
| <span class="sd"> *not* contain an entry for it.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> psser = ps.Series(['b', None, 'a', 'c', 'b'])</span> |
| <span class="sd"> >>> codes, uniques = psser.factorize()</span> |
| <span class="sd"> >>> codes</span> |
| <span class="sd"> 0 1</span> |
| <span class="sd"> 1 -1</span> |
| <span class="sd"> 2 0</span> |
| <span class="sd"> 3 2</span> |
| <span class="sd"> 4 1</span> |
| <span class="sd"> dtype: int32</span> |
| <span class="sd"> >>> uniques</span> |
| <span class="sd"> Index(['a', 'b', 'c'], dtype='object')</span> |
| |
| <span class="sd"> >>> codes, uniques = psser.factorize(use_na_sentinel=False)</span> |
| <span class="sd"> >>> codes</span> |
| <span class="sd"> 0 1</span> |
| <span class="sd"> 1 3</span> |
| <span class="sd"> 2 0</span> |
| <span class="sd"> 3 2</span> |
| <span class="sd"> 4 1</span> |
| <span class="sd"> dtype: int32</span> |
| <span class="sd"> >>> uniques</span> |
| <span class="sd"> Index(['a', 'b', 'c', None], dtype='object')</span> |
| |
| <span class="sd"> For Index:</span> |
| |
| <span class="sd"> >>> psidx = ps.Index(['b', None, 'a', 'c', 'b'])</span> |
| <span class="sd"> >>> codes, uniques = psidx.factorize()</span> |
| <span class="sd"> >>> codes</span> |
| <span class="sd"> Index([1, -1, 0, 2, 1], dtype='int32')</span> |
| <span class="sd"> >>> uniques</span> |
| <span class="sd"> Index(['a', 'b', 'c'], dtype='object')</span> |
| <span class="sd"> """</span> |
| <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">first_series</span> |
| |
| <span class="k">assert</span> <span class="n">sort</span> <span class="ow">is</span> <span class="kc">True</span> |
| <span class="n">use_na_sentinel</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="k">if</span> <span class="n">use_na_sentinel</span> <span class="k">else</span> <span class="kc">False</span> <span class="c1"># type: ignore[assignment]</span> |
| |
| <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> |
| <span class="s2">"Argument `na_sentinel` will be removed in 4.0.0."</span><span class="p">,</span> |
| <span class="ne">FutureWarning</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">CategoricalDtype</span><span class="p">):</span> |
| <span class="n">categories</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">categories</span> |
| <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">categories</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span> |
| <span class="k">for</span> <span class="n">code</span><span class="p">,</span> <span class="n">category</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">enumerate</span><span class="p">(</span><span class="n">categories</span><span class="p">))):</span> |
| <span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">code</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">category</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span> |
| |
| <span class="n">codes</span><span class="p">,</span> <span class="n">uniques</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span> |
| <span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">factorize</span><span class="p">(</span><span class="n">use_na_sentinel</span><span class="o">=</span><span class="n">use_na_sentinel</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">codes</span><span class="p">,</span> <span class="n">uniques</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> |
| |
| <span class="n">uniq_sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span> |
| |
| <span class="c1"># Check number of uniques and constructs sorted `uniques_list`</span> |
| <span class="n">max_compute_count</span> <span class="o">=</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">"compute.max_rows"</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">max_compute_count</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="n">uniq_pdf</span> <span class="o">=</span> <span class="n">uniq_sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">max_compute_count</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span> |
| <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">uniq_pdf</span><span class="p">)</span> <span class="o">></span> <span class="n">max_compute_count</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s2">"Current Series has more then </span><span class="si">{0}</span><span class="s2"> unique values. "</span> |
| <span class="s2">"Please set 'compute.max_rows' by using 'pyspark.pandas.config.set_option' "</span> |
| <span class="s2">"to more than </span><span class="si">{0}</span><span class="s2"> rows. Note that, before changing the "</span> |
| <span class="s2">"'compute.max_rows', this operation is considerably expensive."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> |
| <span class="n">max_compute_count</span> |
| <span class="p">)</span> |
| <span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">uniq_pdf</span> <span class="o">=</span> <span class="n">uniq_sdf</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span> |
| <span class="c1"># pandas takes both NaN and null in Spark to np.nan, so de-duplication is required</span> |
| <span class="n">uniq_series</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="n">uniq_pdf</span><span class="p">)</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">()</span> |
| <span class="n">uniques_list</span> <span class="o">=</span> <span class="n">uniq_series</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span> |
| <span class="n">uniques_list</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">uniques_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">x</span><span class="p">))</span> |
| |
| <span class="c1"># Constructs `unique_to_code` mapping non-na unique to code</span> |
| <span class="n">unique_to_code</span> <span class="o">=</span> <span class="p">{}</span> |
| <span class="k">if</span> <span class="n">use_na_sentinel</span><span class="p">:</span> |
| <span class="n">na_sentinel_code</span> <span class="o">=</span> <span class="n">use_na_sentinel</span> |
| <span class="n">code</span> <span class="o">=</span> <span class="mi">0</span> |
| <span class="k">for</span> <span class="n">unique</span> <span class="ow">in</span> <span class="n">uniques_list</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">unique</span><span class="p">):</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">use_na_sentinel</span><span class="p">:</span> |
| <span class="n">na_sentinel_code</span> <span class="o">=</span> <span class="n">code</span> <span class="c1"># type: ignore[assignment]</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">unique_to_code</span><span class="p">[</span><span class="n">unique</span><span class="p">]</span> <span class="o">=</span> <span class="n">code</span> |
| <span class="n">code</span> <span class="o">+=</span> <span class="mi">1</span> |
| |
| <span class="n">kvs</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span> |
| <span class="n">chain</span><span class="p">(</span><span class="o">*</span><span class="p">([(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">unique</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">code</span><span class="p">))</span> <span class="k">for</span> <span class="n">unique</span><span class="p">,</span> <span class="n">code</span> <span class="ow">in</span> <span class="n">unique_to_code</span><span class="o">.</span><span class="n">items</span><span class="p">()]))</span> |
| <span class="p">)</span> |
| |
| <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">kvs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="c1"># uniques are all missing values</span> |
| <span class="n">new_scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">na_sentinel_code</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">map_scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">create_map</span><span class="p">(</span><span class="o">*</span><span class="n">kvs</span><span class="p">)</span> |
| <span class="n">null_scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">na_sentinel_code</span><span class="p">))</span> |
| <span class="n">new_scol</span> <span class="o">=</span> <span class="n">null_scol</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">map_scol</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">])</span> |
| |
| <span class="n">codes</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">new_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> |
| |
| <span class="k">if</span> <span class="n">use_na_sentinel</span><span class="p">:</span> |
| <span class="c1"># Drops the NaN from the uniques of the values</span> |
| <span class="n">uniques_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">uniques_list</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">x</span><span class="p">)]</span> |
| |
| <span class="n">uniques</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">(</span><span class="n">uniques_list</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">codes</span><span class="p">,</span> <span class="n">uniques</span> |
| |
| |
| <span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="kn">import</span> <span class="nn">os</span> |
| <span class="kn">import</span> <span class="nn">doctest</span> |
| <span class="kn">import</span> <span class="nn">sys</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span> |
| <span class="kn">import</span> <span class="nn">pyspark.pandas.base</span> |
| |
| <span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">"SPARK_HOME"</span><span class="p">])</span> |
| |
| <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">base</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> |
| <span class="n">globs</span><span class="p">[</span><span class="s2">"ps"</span><span class="p">]</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span> |
| <span class="n">spark</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[4]"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"pyspark.pandas.base tests"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> |
| <span class="p">)</span> |
| <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span> |
| <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">base</span><span class="p">,</span> |
| <span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> |
| <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> |
| <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> |
| <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> |
| |
| |
| <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> |
| <span class="n">_test</span><span class="p">()</span> |
| </pre></div> |
| |
| </article> |
| |
| |
| |
| <footer class="bd-footer-article"> |
| |
| <div class="footer-article-items footer-article__inner"> |
| |
| <div class="footer-article-item"><!-- Previous / next buttons --> |
| <div class="prev-next-area"> |
| </div></div> |
| |
| </div> |
| |
| </footer> |
| |
| </div> |
| |
| |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script> |
| <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"><p class="copyright"> |
| Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. |
| </p></div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"><p class="theme-version"> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |