|  |  | 
|  |  | 
|  | <!DOCTYPE html> | 
|  |  | 
|  |  | 
|  | <html > | 
|  |  | 
|  | <head> | 
|  | <meta charset="utf-8" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | 
|  | <title>pyspark.pandas.generic — PySpark 4.0.0-preview1 documentation</title> | 
|  |  | 
|  |  | 
|  |  | 
|  | <script data-cfasync="false"> | 
|  | document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; | 
|  | document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; | 
|  | </script> | 
|  |  | 
|  | <!-- Loaded before other Sphinx assets --> | 
|  | <link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  |  | 
|  |  | 
|  | <link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" /> | 
|  |  | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" /> | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> | 
|  |  | 
|  | <!-- Pre-loaded scripts that we'll load fully later --> | 
|  | <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" /> | 
|  | <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" /> | 
|  |  | 
|  | <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> | 
|  | <script src="../../../_static/jquery.js"></script> | 
|  | <script src="../../../_static/underscore.js"></script> | 
|  | <script src="../../../_static/doctools.js"></script> | 
|  | <script src="../../../_static/clipboard.min.js"></script> | 
|  | <script src="../../../_static/copybutton.js"></script> | 
|  | <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> | 
|  | <script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/pandas/generic';</script> | 
|  | <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/pandas/generic.html" /> | 
|  | <link rel="search" title="Search" href="../../../search.html" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1" /> | 
|  | <meta name="docsearch:language" content="None"> | 
|  |  | 
|  |  | 
|  | <!-- Matomo --> | 
|  | <script type="text/javascript"> | 
|  | var _paq = window._paq = window._paq || []; | 
|  | /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ | 
|  | _paq.push(["disableCookies"]); | 
|  | _paq.push(['trackPageView']); | 
|  | _paq.push(['enableLinkTracking']); | 
|  | (function() { | 
|  | var u="https://analytics.apache.org/"; | 
|  | _paq.push(['setTrackerUrl', u+'matomo.php']); | 
|  | _paq.push(['setSiteId', '40']); | 
|  | var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; | 
|  | g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); | 
|  | })(); | 
|  | </script> | 
|  | <!-- End Matomo Code --> | 
|  |  | 
|  | </head> | 
|  |  | 
|  |  | 
|  | <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> | 
|  |  | 
|  |  | 
|  |  | 
|  | <a class="skip-link" href="#main-content">Skip to main content</a> | 
|  |  | 
|  | <input type="checkbox" | 
|  | class="sidebar-toggle" | 
|  | name="__primary" | 
|  | id="__primary"/> | 
|  | <label class="overlay overlay-primary" for="__primary"></label> | 
|  |  | 
|  | <input type="checkbox" | 
|  | class="sidebar-toggle" | 
|  | name="__secondary" | 
|  | id="__secondary"/> | 
|  | <label class="overlay overlay-secondary" for="__secondary"></label> | 
|  |  | 
|  | <div class="search-button__wrapper"> | 
|  | <div class="search-button__overlay"></div> | 
|  | <div class="search-button__search-container"> | 
|  | <form class="bd-search d-flex align-items-center" | 
|  | action="../../../search.html" | 
|  | method="get"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | <input type="search" | 
|  | class="form-control" | 
|  | name="q" | 
|  | id="search-input" | 
|  | placeholder="Search the docs ..." | 
|  | aria-label="Search the docs ..." | 
|  | autocomplete="off" | 
|  | autocorrect="off" | 
|  | autocapitalize="off" | 
|  | spellcheck="false"/> | 
|  | <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> | 
|  | </form></div> | 
|  | </div> | 
|  |  | 
|  | <nav class="bd-header navbar navbar-expand-lg bd-navbar"> | 
|  | <div class="bd-header__inner bd-page-width"> | 
|  | <label class="sidebar-toggle primary-toggle" for="__primary"> | 
|  | <span class="fa-solid fa-bars"></span> | 
|  | </label> | 
|  |  | 
|  | <div class="navbar-header-items__start"> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  |  | 
|  |  | 
|  | <a class="navbar-brand logo" href="../../../index.html"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/> | 
|  | <script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script> | 
|  |  | 
|  |  | 
|  | </a></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="col-lg-9 navbar-header-items"> | 
|  |  | 
|  | <div class="me-auto navbar-header-items__center"> | 
|  |  | 
|  | <div class="navbar-item"><nav class="navbar-nav"> | 
|  | <p class="sidebar-header-items__title" | 
|  | role="heading" | 
|  | aria-level="1" | 
|  | aria-label="Site Navigation"> | 
|  | Site Navigation | 
|  | </p> | 
|  | <ul class="bd-navbar-elements navbar-nav"> | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../index.html"> | 
|  | Overview | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../getting_started/index.html"> | 
|  | Getting Started | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../user_guide/index.html"> | 
|  | User Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../reference/index.html"> | 
|  | API Reference | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../development/index.html"> | 
|  | Development | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> | 
|  | Migration Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | </ul> | 
|  | </nav></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-header-items__end"> | 
|  |  | 
|  | <div class="navbar-item navbar-persistent--container"> | 
|  |  | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | </button> | 
|  | `); | 
|  | </script> | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-item"><!-- | 
|  | Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | contributor license agreements.  See the NOTICE file distributed with | 
|  | this work for additional information regarding copyright ownership. | 
|  | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | (the "License"); you may not use this file except in compliance with | 
|  | the License.  You may obtain a copy of the License at | 
|  |  | 
|  | http://www.apache.org/licenses/LICENSE-2.0 | 
|  |  | 
|  | Unless required by applicable law or agreed to in writing, software | 
|  | distributed under the License is distributed on an "AS IS" BASIS, | 
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | See the License for the specific language governing permissions and | 
|  | limitations under the License. | 
|  | --> | 
|  |  | 
|  | <div id="version-button" class="dropdown"> | 
|  | <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> | 
|  | 4.0.0-preview1 | 
|  | <span class="caret"></span> | 
|  | </button> | 
|  | <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> | 
|  | <!-- dropdown will be populated by javascript on page load --> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | // Function to construct the target URL from the JSON components | 
|  | function buildURL(entry) { | 
|  | var template = "https://spark.apache.org/docs/{version}/api/python/index.html";  // supplied by jinja | 
|  | template = template.replace("{version}", entry.version); | 
|  | return template; | 
|  | } | 
|  |  | 
|  | // Function to check if corresponding page path exists in other version of docs | 
|  | // and, if so, go there instead of the homepage of the other docs version | 
|  | function checkPageExistsAndRedirect(event) { | 
|  | const currentFilePath = "_modules/pyspark/pandas/generic.html", | 
|  | otherDocsHomepage = event.target.getAttribute("href"); | 
|  | let tryUrl = `${otherDocsHomepage}${currentFilePath}`; | 
|  | $.ajax({ | 
|  | type: 'HEAD', | 
|  | url: tryUrl, | 
|  | // if the page exists, go there | 
|  | success: function() { | 
|  | location.href = tryUrl; | 
|  | } | 
|  | }).fail(function() { | 
|  | location.href = otherDocsHomepage; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Function to populate the version switcher | 
|  | (function () { | 
|  | // get JSON config | 
|  | $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { | 
|  | // create the nodes first (before AJAX calls) to ensure the order is | 
|  | // correct (for now, links will go to doc version homepage) | 
|  | $.each(data, function(index, entry) { | 
|  | // if no custom name specified (e.g., "latest"), use version string | 
|  | if (!("name" in entry)) { | 
|  | entry.name = entry.version; | 
|  | } | 
|  | // construct the appropriate URL, and add it to the dropdown | 
|  | entry.url = buildURL(entry); | 
|  | const node = document.createElement("a"); | 
|  | node.setAttribute("class", "list-group-item list-group-item-action py-1"); | 
|  | node.setAttribute("href", `${entry.url}`); | 
|  | node.textContent = `${entry.name}`; | 
|  | node.onclick = checkPageExistsAndRedirect; | 
|  | $("#version_switcher").append(node); | 
|  | }); | 
|  | }); | 
|  | })(); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> | 
|  | <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> | 
|  | <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> | 
|  | </button> | 
|  | `); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" | 
|  | aria-label="Icon Links"> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> | 
|  | <label class="sr-only">GitHub</label></a> | 
|  | </li> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> | 
|  | <label class="sr-only">PyPI</label></a> | 
|  | </li> | 
|  | </ul></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-persistent--mobile"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | </button> | 
|  | `); | 
|  | </script> | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </nav> | 
|  |  | 
|  | <div class="bd-container"> | 
|  | <div class="bd-container__inner bd-page-width"> | 
|  |  | 
|  | <div class="bd-sidebar-primary bd-sidebar hide-on-wide"> | 
|  |  | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items sidebar-primary__section"> | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items__center"> | 
|  |  | 
|  | <div class="navbar-item"><nav class="navbar-nav"> | 
|  | <p class="sidebar-header-items__title" | 
|  | role="heading" | 
|  | aria-level="1" | 
|  | aria-label="Site Navigation"> | 
|  | Site Navigation | 
|  | </p> | 
|  | <ul class="bd-navbar-elements navbar-nav"> | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../index.html"> | 
|  | Overview | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../getting_started/index.html"> | 
|  | Getting Started | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../user_guide/index.html"> | 
|  | User Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../reference/index.html"> | 
|  | API Reference | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../development/index.html"> | 
|  | Development | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> | 
|  | Migration Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | </ul> | 
|  | </nav></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items__end"> | 
|  |  | 
|  | <div class="navbar-item"><!-- | 
|  | Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | contributor license agreements.  See the NOTICE file distributed with | 
|  | this work for additional information regarding copyright ownership. | 
|  | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | (the "License"); you may not use this file except in compliance with | 
|  | the License.  You may obtain a copy of the License at | 
|  |  | 
|  | http://www.apache.org/licenses/LICENSE-2.0 | 
|  |  | 
|  | Unless required by applicable law or agreed to in writing, software | 
|  | distributed under the License is distributed on an "AS IS" BASIS, | 
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | See the License for the specific language governing permissions and | 
|  | limitations under the License. | 
|  | --> | 
|  |  | 
|  | <div id="version-button" class="dropdown"> | 
|  | <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> | 
|  | 4.0.0-preview1 | 
|  | <span class="caret"></span> | 
|  | </button> | 
|  | <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> | 
|  | <!-- dropdown will be populated by javascript on page load --> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | // Function to construct the target URL from the JSON components | 
|  | function buildURL(entry) { | 
|  | var template = "https://spark.apache.org/docs/{version}/api/python/index.html";  // supplied by jinja | 
|  | template = template.replace("{version}", entry.version); | 
|  | return template; | 
|  | } | 
|  |  | 
|  | // Function to check if corresponding page path exists in other version of docs | 
|  | // and, if so, go there instead of the homepage of the other docs version | 
|  | function checkPageExistsAndRedirect(event) { | 
|  | const currentFilePath = "_modules/pyspark/pandas/generic.html", | 
|  | otherDocsHomepage = event.target.getAttribute("href"); | 
|  | let tryUrl = `${otherDocsHomepage}${currentFilePath}`; | 
|  | $.ajax({ | 
|  | type: 'HEAD', | 
|  | url: tryUrl, | 
|  | // if the page exists, go there | 
|  | success: function() { | 
|  | location.href = tryUrl; | 
|  | } | 
|  | }).fail(function() { | 
|  | location.href = otherDocsHomepage; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Function to populate the version switcher | 
|  | (function () { | 
|  | // get JSON config | 
|  | $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { | 
|  | // create the nodes first (before AJAX calls) to ensure the order is | 
|  | // correct (for now, links will go to doc version homepage) | 
|  | $.each(data, function(index, entry) { | 
|  | // if no custom name specified (e.g., "latest"), use version string | 
|  | if (!("name" in entry)) { | 
|  | entry.name = entry.version; | 
|  | } | 
|  | // construct the appropriate URL, and add it to the dropdown | 
|  | entry.url = buildURL(entry); | 
|  | const node = document.createElement("a"); | 
|  | node.setAttribute("class", "list-group-item list-group-item-action py-1"); | 
|  | node.setAttribute("href", `${entry.url}`); | 
|  | node.textContent = `${entry.name}`; | 
|  | node.onclick = checkPageExistsAndRedirect; | 
|  | $("#version_switcher").append(node); | 
|  | }); | 
|  | }); | 
|  | })(); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> | 
|  | <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> | 
|  | <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> | 
|  | </button> | 
|  | `); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" | 
|  | aria-label="Icon Links"> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> | 
|  | <label class="sr-only">GitHub</label></a> | 
|  | </li> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> | 
|  | <label class="sr-only">PyPI</label></a> | 
|  | </li> | 
|  | </ul></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="sidebar-primary-items__end sidebar-primary__section"> | 
|  | </div> | 
|  |  | 
|  | <div id="rtd-footer-container"></div> | 
|  |  | 
|  |  | 
|  | </div> | 
|  |  | 
|  | <main id="main-content" class="bd-main"> | 
|  |  | 
|  |  | 
|  | <div class="bd-content"> | 
|  | <div class="bd-article-container"> | 
|  |  | 
|  | <div class="bd-header-article"> | 
|  | <div class="header-article-items header-article__inner"> | 
|  |  | 
|  | <div class="header-article-items__start"> | 
|  |  | 
|  | <div class="header-article-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  | <nav aria-label="Breadcrumbs"> | 
|  | <ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb"> | 
|  |  | 
|  | <li class="breadcrumb-item breadcrumb-home"> | 
|  | <a href="../../../index.html" class="nav-link" aria-label="Home"> | 
|  | <i class="fa-solid fa-home"></i> | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li> | 
|  |  | 
|  | <li class="breadcrumb-item active" aria-current="page">pyspark.pandas.generic</li> | 
|  | </ul> | 
|  | </nav> | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <div id="searchbox"></div> | 
|  | <article class="bd-article" role="main"> | 
|  |  | 
|  | <h1>Source code for pyspark.pandas.generic</h1><div class="highlight"><pre> | 
|  | <span></span><span class="c1">#</span> | 
|  | <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> | 
|  | <span class="c1"># contributor license agreements.  See the NOTICE file distributed with</span> | 
|  | <span class="c1"># this work for additional information regarding copyright ownership.</span> | 
|  | <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> | 
|  | <span class="c1"># (the "License"); you may not use this file except in compliance with</span> | 
|  | <span class="c1"># the License.  You may obtain a copy of the License at</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="c1">#    http://www.apache.org/licenses/LICENSE-2.0</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> | 
|  | <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> | 
|  | <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> | 
|  | <span class="c1"># See the License for the specific language governing permissions and</span> | 
|  | <span class="c1"># limitations under the License.</span> | 
|  | <span class="c1">#</span> | 
|  |  | 
|  | <span class="sd">"""</span> | 
|  | <span class="sd">A base class of DataFrame/Column to behave like pandas DataFrame/Series.</span> | 
|  | <span class="sd">"""</span> | 
|  | <span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span> | 
|  | <span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">reduce</span> | 
|  | <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">Any</span><span class="p">,</span> | 
|  | <span class="n">Callable</span><span class="p">,</span> | 
|  | <span class="n">Dict</span><span class="p">,</span> | 
|  | <span class="n">IO</span><span class="p">,</span> | 
|  | <span class="n">List</span><span class="p">,</span> | 
|  | <span class="n">Optional</span><span class="p">,</span> | 
|  | <span class="n">NoReturn</span><span class="p">,</span> | 
|  | <span class="n">Tuple</span><span class="p">,</span> | 
|  | <span class="n">Union</span><span class="p">,</span> | 
|  | <span class="n">TYPE_CHECKING</span><span class="p">,</span> | 
|  | <span class="n">cast</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="kn">import</span> <span class="nn">warnings</span> | 
|  |  | 
|  | <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> | 
|  | <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> | 
|  | <span class="kn">from</span> <span class="nn">pandas.api.types</span> <span class="kn">import</span> <span class="n">is_list_like</span>  <span class="c1"># type: ignore[attr-defined]</span> | 
|  |  | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">Column</span><span class="p">,</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">BooleanType</span><span class="p">,</span> | 
|  | <span class="n">DoubleType</span><span class="p">,</span> | 
|  | <span class="n">LongType</span><span class="p">,</span> | 
|  | <span class="n">NumericType</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">ps</span>  <span class="c1"># For running doctests and reference resolution in PyCharm.</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas._typing</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">Axis</span><span class="p">,</span> | 
|  | <span class="n">DataFrameOrSeries</span><span class="p">,</span> | 
|  | <span class="n">Dtype</span><span class="p">,</span> | 
|  | <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">Label</span><span class="p">,</span> | 
|  | <span class="n">Name</span><span class="p">,</span> | 
|  | <span class="n">Scalar</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.indexing</span> <span class="kn">import</span> <span class="n">AtIndexer</span><span class="p">,</span> <span class="n">iAtIndexer</span><span class="p">,</span> <span class="n">iLocIndexer</span><span class="p">,</span> <span class="n">LocIndexer</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.internal</span> <span class="kn">import</span> <span class="n">InternalFrame</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.spark</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">SF</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.typedef</span> <span class="kn">import</span> <span class="n">spark_type_to_pandas_dtype</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.utils</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">is_name_like_tuple</span><span class="p">,</span> | 
|  | <span class="n">is_name_like_value</span><span class="p">,</span> | 
|  | <span class="n">name_like_string</span><span class="p">,</span> | 
|  | <span class="n">scol_for</span><span class="p">,</span> | 
|  | <span class="n">sql_conf</span><span class="p">,</span> | 
|  | <span class="n">validate_arguments_and_invoke_function</span><span class="p">,</span> | 
|  | <span class="n">validate_axis</span><span class="p">,</span> | 
|  | <span class="n">validate_mode</span><span class="p">,</span> | 
|  | <span class="n">SPARK_CONF_ARROW_ENABLED</span><span class="p">,</span> | 
|  | <span class="n">log_advice</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.indexes.base</span> <span class="kn">import</span> <span class="n">Index</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">GroupBy</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.window</span> <span class="kn">import</span> <span class="n">Rolling</span><span class="p">,</span> <span class="n">Expanding</span><span class="p">,</span> <span class="n">ExponentialMoving</span> | 
|  |  | 
|  |  | 
|  | <span class="n">bool_type</span> <span class="o">=</span> <span class="nb">bool</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">Frame</span><span class="p">(</span><span class="nb">object</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    The base class for both DataFrame and Series.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">Any</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_internal</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">InternalFrame</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_apply_series_op</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">op</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Series"</span><span class="p">],</span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Series"</span><span class="p">,</span> <span class="n">Column</span><span class="p">]],</span> | 
|  | <span class="n">should_resolve</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">sfun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">"Series"</span><span class="p">],</span> <span class="n">Column</span><span class="p">],</span> | 
|  | <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Series"</span><span class="p">,</span> <span class="n">Scalar</span><span class="p">]:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">dtypes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">,</span> <span class="n">Dtype</span><span class="p">]:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">to_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">]:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_to_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">]:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Index"</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_to_internal_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">]:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">head</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'axis' parameter</span> | 
|  | <span class="k">def</span> <span class="nf">cummin</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return cumulative minimum over a DataFrame or Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Returns a DataFrame or Series of the same size containing the cumulative minimum.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of cummin uses Spark's Window without</span> | 
|  | <span class="sd">            specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        skipna: boolean, default True</span> | 
|  | <span class="sd">            Exclude NA/null values. If an entire row/column is NA, the result will be NA.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame or Series</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.min: Return the minimum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummax: Return cumulative maximum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummin: Return cumulative minimum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumsum: Return cumulative sum over DataFrame axis.</span> | 
|  | <span class="sd">        Series.min: Return the minimum over Series axis.</span> | 
|  | <span class="sd">        Series.cummax: Return cumulative maximum over Series axis.</span> | 
|  | <span class="sd">        Series.cummin: Return cumulative minimum over Series axis.</span> | 
|  | <span class="sd">        Series.cumsum: Return cumulative sum over Series axis.</span> | 
|  | <span class="sd">        Series.cumprod: Return cumulative product over Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame([[2.0, 1.0], [3.0, None], [1.0, 0.0]], columns=list('AB'))</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">             A    B</span> | 
|  | <span class="sd">        0  2.0  1.0</span> | 
|  | <span class="sd">        1  3.0  NaN</span> | 
|  | <span class="sd">        2  1.0  0.0</span> | 
|  |  | 
|  | <span class="sd">        By default, iterates over rows and finds the minimum in each column.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.cummin()</span> | 
|  | <span class="sd">             A    B</span> | 
|  | <span class="sd">        0  2.0  1.0</span> | 
|  | <span class="sd">        1  2.0  NaN</span> | 
|  | <span class="sd">        2  1.0  0.0</span> | 
|  |  | 
|  | <span class="sd">        It works identically in Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.A.cummin()</span> | 
|  | <span class="sd">        0    2.0</span> | 
|  | <span class="sd">        1    2.0</span> | 
|  | <span class="sd">        2    1.0</span> | 
|  | <span class="sd">        Name: A, dtype: float64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span><span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">,</span> <span class="n">skipna</span><span class="p">),</span> <span class="n">should_resolve</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'axis' parameter</span> | 
|  | <span class="k">def</span> <span class="nf">cummax</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return cumulative maximum over a DataFrame or Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Returns a DataFrame or Series of the same size containing the cumulative maximum.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of cummax uses Spark's Window without</span> | 
|  | <span class="sd">            specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        skipna: boolean, default True</span> | 
|  | <span class="sd">            Exclude NA/null values. If an entire row/column is NA, the result will be NA.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame or Series</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.max: Return the maximum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummax: Return cumulative maximum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummin: Return cumulative minimum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumsum: Return cumulative sum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumprod: Return cumulative product over DataFrame axis.</span> | 
|  | <span class="sd">        Series.max: Return the maximum over Series axis.</span> | 
|  | <span class="sd">        Series.cummax: Return cumulative maximum over Series axis.</span> | 
|  | <span class="sd">        Series.cummin: Return cumulative minimum over Series axis.</span> | 
|  | <span class="sd">        Series.cumsum: Return cumulative sum over Series axis.</span> | 
|  | <span class="sd">        Series.cumprod: Return cumulative product over Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame([[2.0, 1.0], [3.0, None], [1.0, 0.0]], columns=list('AB'))</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">             A    B</span> | 
|  | <span class="sd">        0  2.0  1.0</span> | 
|  | <span class="sd">        1  3.0  NaN</span> | 
|  | <span class="sd">        2  1.0  0.0</span> | 
|  |  | 
|  | <span class="sd">        By default, iterates over rows and finds the maximum in each column.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.cummax()</span> | 
|  | <span class="sd">             A    B</span> | 
|  | <span class="sd">        0  2.0  1.0</span> | 
|  | <span class="sd">        1  3.0  NaN</span> | 
|  | <span class="sd">        2  3.0  1.0</span> | 
|  |  | 
|  | <span class="sd">        It works identically in Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.B.cummax()</span> | 
|  | <span class="sd">        0    1.0</span> | 
|  | <span class="sd">        1    NaN</span> | 
|  | <span class="sd">        2    1.0</span> | 
|  | <span class="sd">        Name: B, dtype: float64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span><span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">,</span> <span class="n">skipna</span><span class="p">),</span> <span class="n">should_resolve</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'axis' parameter</span> | 
|  | <span class="k">def</span> <span class="nf">cumsum</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return cumulative sum over a DataFrame or Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Returns a DataFrame or Series of the same size containing the cumulative sum.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of cumsum uses Spark's Window without</span> | 
|  | <span class="sd">            specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        skipna: boolean, default True</span> | 
|  | <span class="sd">            Exclude NA/null values. If an entire row/column is NA, the result will be NA.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame or Series</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.sum: Return the sum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummax: Return cumulative maximum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummin: Return cumulative minimum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumsum: Return cumulative sum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumprod: Return cumulative product over DataFrame axis.</span> | 
|  | <span class="sd">        Series.sum: Return the sum over Series axis.</span> | 
|  | <span class="sd">        Series.cummax: Return cumulative maximum over Series axis.</span> | 
|  | <span class="sd">        Series.cummin: Return cumulative minimum over Series axis.</span> | 
|  | <span class="sd">        Series.cumsum: Return cumulative sum over Series axis.</span> | 
|  | <span class="sd">        Series.cumprod: Return cumulative product over Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame([[2.0, 1.0], [3.0, None], [1.0, 0.0]], columns=list('AB'))</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">             A    B</span> | 
|  | <span class="sd">        0  2.0  1.0</span> | 
|  | <span class="sd">        1  3.0  NaN</span> | 
|  | <span class="sd">        2  1.0  0.0</span> | 
|  |  | 
|  | <span class="sd">        By default, iterates over rows and finds the sum in each column.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.cumsum()</span> | 
|  | <span class="sd">             A    B</span> | 
|  | <span class="sd">        0  2.0  1.0</span> | 
|  | <span class="sd">        1  5.0  NaN</span> | 
|  | <span class="sd">        2  6.0  1.0</span> | 
|  |  | 
|  | <span class="sd">        It works identically in Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.A.cumsum()</span> | 
|  | <span class="sd">        0    2.0</span> | 
|  | <span class="sd">        1    5.0</span> | 
|  | <span class="sd">        2    6.0</span> | 
|  | <span class="sd">        Name: A, dtype: float64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span><span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cumsum</span><span class="p">(</span><span class="n">skipna</span><span class="p">),</span> <span class="n">should_resolve</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'axis' parameter</span> | 
|  | <span class="k">def</span> <span class="nf">cumprod</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return cumulative product over a DataFrame or Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Returns a DataFrame or Series of the same size containing the cumulative product.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of cumprod uses Spark's Window without</span> | 
|  | <span class="sd">            specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: unlike pandas', pandas-on-Spark's emulates cumulative product by</span> | 
|  | <span class="sd">            ``exp(sum(log(...)))`` trick. Therefore, it only works for positive numbers.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        skipna: boolean, default True</span> | 
|  | <span class="sd">            Exclude NA/null values. If an entire row/column is NA, the result will be NA.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame or Series</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.cummax: Return cumulative maximum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cummin: Return cumulative minimum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumsum: Return cumulative sum over DataFrame axis.</span> | 
|  | <span class="sd">        DataFrame.cumprod: Return cumulative product over DataFrame axis.</span> | 
|  | <span class="sd">        Series.cummax: Return cumulative maximum over Series axis.</span> | 
|  | <span class="sd">        Series.cummin: Return cumulative minimum over Series axis.</span> | 
|  | <span class="sd">        Series.cumsum: Return cumulative sum over Series axis.</span> | 
|  | <span class="sd">        Series.cumprod: Return cumulative product over Series axis.</span> | 
|  |  | 
|  | <span class="sd">        Raises</span> | 
|  | <span class="sd">        ------</span> | 
|  | <span class="sd">        Exception: If the values is equal to or lower than 0.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame([[2.0, 1.0], [3.0, None], [4.0, 10.0]], columns=list('AB'))</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">             A     B</span> | 
|  | <span class="sd">        0  2.0   1.0</span> | 
|  | <span class="sd">        1  3.0   NaN</span> | 
|  | <span class="sd">        2  4.0  10.0</span> | 
|  |  | 
|  | <span class="sd">        By default, iterates over rows and finds the sum in each column.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.cumprod()</span> | 
|  | <span class="sd">              A     B</span> | 
|  | <span class="sd">        0   2.0   1.0</span> | 
|  | <span class="sd">        1   6.0   NaN</span> | 
|  | <span class="sd">        2  24.0  10.0</span> | 
|  |  | 
|  | <span class="sd">        It works identically in Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.A.cumprod()</span> | 
|  | <span class="sd">        0     2.0</span> | 
|  | <span class="sd">        1     6.0</span> | 
|  | <span class="sd">        2    24.0</span> | 
|  | <span class="sd">        Name: A, dtype: float64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span><span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cumprod</span><span class="p">(</span><span class="n">skipna</span><span class="p">),</span> <span class="n">should_resolve</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">pipe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Any</span><span class="p">],</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="n">Any</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sa">r</span><span class="sd">"""</span> | 
|  | <span class="sd">        Apply func(self, \*args, \*\*kwargs).</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        func: function</span> | 
|  | <span class="sd">            function to apply to the DataFrame.</span> | 
|  | <span class="sd">            ``args``, and ``kwargs`` are passed into ``func``.</span> | 
|  | <span class="sd">            Alternatively a ``(callable, data_keyword)`` tuple where</span> | 
|  | <span class="sd">            ``data_keyword`` is a string indicating the keyword of</span> | 
|  | <span class="sd">            ``callable`` that expects the DataFrames.</span> | 
|  | <span class="sd">        args: iterable, optional</span> | 
|  | <span class="sd">            positional arguments passed into ``func``.</span> | 
|  | <span class="sd">        kwargs: mapping, optional</span> | 
|  | <span class="sd">            a dictionary of keyword arguments passed into ``func``.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        object: the return type of ``func``.</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        Use ``.pipe`` when chaining together functions that expect</span> | 
|  | <span class="sd">        Series, DataFrames or GroupBy objects. For example, given</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'category': ['A', 'A', 'B'],</span> | 
|  | <span class="sd">        ...                    'col1': [1, 2, 3],</span> | 
|  | <span class="sd">        ...                    'col2': [4, 5, 6]},</span> | 
|  | <span class="sd">        ...                   columns=['category', 'col1', 'col2'])</span> | 
|  | <span class="sd">        >>> def keep_category_a(df):</span> | 
|  | <span class="sd">        ...     return df[df['category'] == 'A']</span> | 
|  | <span class="sd">        >>> def add_one(df, column):</span> | 
|  | <span class="sd">        ...     return df.assign(col3=df[column] + 1)</span> | 
|  | <span class="sd">        >>> def multiply(df, column1, column2):</span> | 
|  | <span class="sd">        ...     return df.assign(col4=df[column1] * df[column2])</span> | 
|  |  | 
|  |  | 
|  | <span class="sd">        instead of writing</span> | 
|  |  | 
|  | <span class="sd">        >>> multiply(add_one(keep_category_a(df), column="col1"), column1="col2", column2="col3")</span> | 
|  | <span class="sd">          category  col1  col2  col3  col4</span> | 
|  | <span class="sd">        0        A     1     4     2     8</span> | 
|  | <span class="sd">        1        A     2     5     3    15</span> | 
|  |  | 
|  |  | 
|  | <span class="sd">        You can write</span> | 
|  |  | 
|  | <span class="sd">        >>> (df.pipe(keep_category_a)</span> | 
|  | <span class="sd">        ...    .pipe(add_one, column="col1")</span> | 
|  | <span class="sd">        ...    .pipe(multiply, column1="col2", column2="col3")</span> | 
|  | <span class="sd">        ... )</span> | 
|  | <span class="sd">          category  col1  col2  col3  col4</span> | 
|  | <span class="sd">        0        A     1     4     2     8</span> | 
|  | <span class="sd">        1        A     2     5     3    15</span> | 
|  |  | 
|  |  | 
|  | <span class="sd">        If you have a function that takes the data as the second</span> | 
|  | <span class="sd">        argument, pass a tuple indicating which keyword expects the</span> | 
|  | <span class="sd">        data. For example, suppose ``f`` takes its data as ``df``:</span> | 
|  |  | 
|  | <span class="sd">        >>> def multiply_2(column1, df, column2):</span> | 
|  | <span class="sd">        ...     return df.assign(col4=df[column1] * df[column2])</span> | 
|  |  | 
|  |  | 
|  | <span class="sd">        Then you can write</span> | 
|  |  | 
|  | <span class="sd">        >>> (df.pipe(keep_category_a)</span> | 
|  | <span class="sd">        ...    .pipe(add_one, column="col1")</span> | 
|  | <span class="sd">        ...    .pipe((multiply_2, 'df'), column1="col2", column2="col3")</span> | 
|  | <span class="sd">        ... )</span> | 
|  | <span class="sd">          category  col1  col2  col3  col4</span> | 
|  | <span class="sd">        0        A     1     4     2     8</span> | 
|  | <span class="sd">        1        A     2     5     3    15</span> | 
|  |  | 
|  | <span class="sd">        You can use lambda as well</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([1, 2, 3]).pipe(lambda x: (x + 1).rename("value"))</span> | 
|  | <span class="sd">        0    2</span> | 
|  | <span class="sd">        1    3</span> | 
|  | <span class="sd">        2    4</span> | 
|  | <span class="sd">        Name: value, dtype: int64</span> | 
|  | <span class="sd">        """</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span> | 
|  | <span class="n">func</span><span class="p">,</span> <span class="n">target</span> <span class="o">=</span> <span class="n">func</span> | 
|  | <span class="k">if</span> <span class="n">target</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"</span><span class="si">%s</span><span class="s2"> is both the pipe target and a keyword "</span> <span class="s2">"argument"</span> <span class="o">%</span> <span class="n">target</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span><span class="p">[</span><span class="n">target</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span> | 
|  | <span class="k">return</span> <span class="n">func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">to_numpy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        A NumPy ndarray representing the values in this DataFrame or Series.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: This method should only be used if the resulting NumPy ndarray is expected</span> | 
|  | <span class="sd">            to be small, as all the data is loaded into the driver's memory.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        numpy.ndarray</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> ps.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy()</span> | 
|  | <span class="sd">        array([[1, 3],</span> | 
|  | <span class="sd">               [2, 4]])</span> | 
|  |  | 
|  | <span class="sd">        With heterogeneous data, the lowest common type will have to be used.</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}).to_numpy()</span> | 
|  | <span class="sd">        array([[1. , 3. ],</span> | 
|  | <span class="sd">               [2. , 4.5]])</span> | 
|  |  | 
|  | <span class="sd">        For a mix of numeric and non-numeric types, the output array will have object dtype.</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({"A": [1, 2], "B": [3.0, 4.5], "C": pd.date_range('2000', periods=2)})</span> | 
|  | <span class="sd">        >>> df.to_numpy()</span> | 
|  | <span class="sd">        array([[1, 3.0, Timestamp('2000-01-01 00:00:00')],</span> | 
|  | <span class="sd">               [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)</span> | 
|  |  | 
|  | <span class="sd">        For Series,</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series(['a', 'b', 'a']).to_numpy()</span> | 
|  | <span class="sd">        array(['a', 'b', 'a'], dtype=object)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">log_advice</span><span class="p">(</span> | 
|  | <span class="s2">"`to_numpy` loads all data into the driver's memory. "</span> | 
|  | <span class="s2">"It should only be used if the resulting NumPy ndarray is expected to be small."</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">values</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="k">def</span> <span class="nf">values</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return a Numpy representation of the DataFrame or the Series.</span> | 
|  |  | 
|  | <span class="sd">        .. warning:: We recommend using `DataFrame.to_numpy()` or `Series.to_numpy()` instead.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: This method should only be used if the resulting NumPy ndarray is expected</span> | 
|  | <span class="sd">            to be small, as all the data is loaded into the driver's memory.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        numpy.ndarray</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        A DataFrame where all columns are the same type (e.g., int64) results in an array of</span> | 
|  | <span class="sd">        the same type.</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'age':    [ 3,  29],</span> | 
|  | <span class="sd">        ...                    'height': [94, 170],</span> | 
|  | <span class="sd">        ...                    'weight': [31, 115]})</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">           age  height  weight</span> | 
|  | <span class="sd">        0    3      94      31</span> | 
|  | <span class="sd">        1   29     170     115</span> | 
|  | <span class="sd">        >>> df.dtypes</span> | 
|  | <span class="sd">        age       int64</span> | 
|  | <span class="sd">        height    int64</span> | 
|  | <span class="sd">        weight    int64</span> | 
|  | <span class="sd">        dtype: object</span> | 
|  | <span class="sd">        >>> df.values</span> | 
|  | <span class="sd">        array([[  3,  94,  31],</span> | 
|  | <span class="sd">               [ 29, 170, 115]])</span> | 
|  |  | 
|  | <span class="sd">        A DataFrame with mixed type columns(e.g., str/object, int64, float32) results in an ndarray</span> | 
|  | <span class="sd">        of the broadest type that accommodates these mixed types (e.g., object).</span> | 
|  |  | 
|  | <span class="sd">        >>> df2 = ps.DataFrame([('parrot',   24.0, 'second'),</span> | 
|  | <span class="sd">        ...                     ('lion',     80.5, 'first'),</span> | 
|  | <span class="sd">        ...                     ('monkey', np.nan, None)],</span> | 
|  | <span class="sd">        ...                   columns=('name', 'max_speed', 'rank'))</span> | 
|  | <span class="sd">        >>> df2.dtypes</span> | 
|  | <span class="sd">        name          object</span> | 
|  | <span class="sd">        max_speed    float64</span> | 
|  | <span class="sd">        rank          object</span> | 
|  | <span class="sd">        dtype: object</span> | 
|  | <span class="sd">        >>> df2.values</span> | 
|  | <span class="sd">        array([['parrot', 24.0, 'second'],</span> | 
|  | <span class="sd">               ['lion', 80.5, 'first'],</span> | 
|  | <span class="sd">               ['monkey', nan, None]], dtype=object)</span> | 
|  |  | 
|  | <span class="sd">        For Series,</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([1, 2, 3]).values</span> | 
|  | <span class="sd">        array([1, 2, 3])</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series(list('aabc')).values</span> | 
|  | <span class="sd">        array(['a', 'a', 'b', 'c'], dtype=object)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">"We recommend using `</span><span class="si">{}</span><span class="s2">.to_numpy()` instead."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">))</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">to_csv</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">sep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">","</span><span class="p">,</span> | 
|  | <span class="n">na_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> | 
|  | <span class="n">columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">header</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">quotechar</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">'"'</span><span class="p">,</span> | 
|  | <span class="n">date_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">escapechar</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">num_files</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"w"</span><span class="p">,</span> | 
|  | <span class="n">partition_cols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">index_col</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sa">r</span><span class="sd">"""</span> | 
|  | <span class="sd">        Write object to a comma-separated values (csv) file.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: pandas-on-Spark `to_csv` writes files to a path or URI. Unlike pandas',</span> | 
|  | <span class="sd">            pandas-on-Spark respects HDFS's property such as 'fs.default.name'.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: pandas-on-Spark writes CSV files into the directory, `path`, and writes</span> | 
|  | <span class="sd">            multiple `part-...` files in the directory when `path` is specified.</span> | 
|  | <span class="sd">            This behavior was inherited from Apache Spark. The number of partitions can</span> | 
|  | <span class="sd">            be controlled by `num_files`. This is deprecated.</span> | 
|  | <span class="sd">            Use `DataFrame.spark.repartition` instead.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        path: str, default None</span> | 
|  | <span class="sd">            File path. If None is provided the result is returned as a string.</span> | 
|  | <span class="sd">        sep: str, default ','</span> | 
|  | <span class="sd">            String of length 1. Field delimiter for the output file.</span> | 
|  | <span class="sd">        na_rep: str, default ''</span> | 
|  | <span class="sd">            Missing data representation.</span> | 
|  | <span class="sd">        columns: sequence, optional</span> | 
|  | <span class="sd">            Columns to write.</span> | 
|  | <span class="sd">        header: bool or list of str, default True</span> | 
|  | <span class="sd">            Write out the column names. If a list of strings is given it is</span> | 
|  | <span class="sd">            assumed to be aliases for the column names.</span> | 
|  | <span class="sd">        quotechar: str, default '\"'</span> | 
|  | <span class="sd">            String of length 1. Character used to quote fields.</span> | 
|  | <span class="sd">        date_format: str, default None</span> | 
|  | <span class="sd">            Format string for datetime objects.</span> | 
|  | <span class="sd">        escapechar: str, default None</span> | 
|  | <span class="sd">            String of length 1. Character used to escape `sep` and `quotechar`</span> | 
|  | <span class="sd">            when appropriate.</span> | 
|  | <span class="sd">        num_files: the number of partitions to be written in `path` directory when</span> | 
|  | <span class="sd">            this is a path. This is deprecated. Use `DataFrame.spark.repartition` instead.</span> | 
|  | <span class="sd">        mode: str</span> | 
|  | <span class="sd">            Python write mode, default 'w'.</span> | 
|  |  | 
|  | <span class="sd">            .. note:: mode can accept the strings for Spark writing mode.</span> | 
|  | <span class="sd">                Such as 'append', 'overwrite', 'ignore', 'error', 'errorifexists'.</span> | 
|  |  | 
|  | <span class="sd">                - 'append' (equivalent to 'a'): Append the new data to existing data.</span> | 
|  | <span class="sd">                - 'overwrite' (equivalent to 'w'): Overwrite existing data.</span> | 
|  | <span class="sd">                - 'ignore': Silently ignore this operation if data already exists.</span> | 
|  | <span class="sd">                - 'error' or 'errorifexists': Throw an exception if data already exists.</span> | 
|  |  | 
|  | <span class="sd">        partition_cols: str or list of str, optional, default None</span> | 
|  | <span class="sd">            Names of partitioning columns</span> | 
|  | <span class="sd">        index_col: str or list of str, optional, default: None</span> | 
|  | <span class="sd">            Column names to be used in Spark to represent pandas-on-Spark's index. The index name</span> | 
|  | <span class="sd">            in pandas-on-Spark is ignored. By default, the index is always lost.</span> | 
|  | <span class="sd">        options: keyword arguments for additional options specific to PySpark.</span> | 
|  | <span class="sd">            These kwargs are specific to PySpark's CSV options to pass. Check</span> | 
|  | <span class="sd">            the options in PySpark's API documentation for spark.write.csv(...).</span> | 
|  | <span class="sd">            It has higher priority and overwrites all other options.</span> | 
|  | <span class="sd">            This parameter only works when `path` is specified.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        str or None</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        read_csv</span> | 
|  | <span class="sd">        DataFrame.to_delta</span> | 
|  | <span class="sd">        DataFrame.to_table</span> | 
|  | <span class="sd">        DataFrame.to_parquet</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame(dict(</span> | 
|  | <span class="sd">        ...    date=list(pd.date_range('2012-1-1 12:00:00', periods=3, freq='M')),</span> | 
|  | <span class="sd">        ...    country=['KR', 'US', 'JP'],</span> | 
|  | <span class="sd">        ...    code=[1, 2 ,3]), columns=['date', 'country', 'code'])</span> | 
|  | <span class="sd">        >>> df.sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">                           date country  code</span> | 
|  | <span class="sd">        ... 2012-01-31 12:00:00      KR     1</span> | 
|  | <span class="sd">        ... 2012-02-29 12:00:00      US     2</span> | 
|  | <span class="sd">        ... 2012-03-31 12:00:00      JP     3</span> | 
|  |  | 
|  | <span class="sd">        >>> print(df.to_csv())  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">        date,country,code</span> | 
|  | <span class="sd">        2012-01-31 12:00:00,KR,1</span> | 
|  | <span class="sd">        2012-02-29 12:00:00,US,2</span> | 
|  | <span class="sd">        2012-03-31 12:00:00,JP,3</span> | 
|  |  | 
|  | <span class="sd">        >>> df.cummax().to_csv(path=r'%s/to_csv/foo.csv' % path, num_files=1)</span> | 
|  | <span class="sd">        >>> ps.read_csv(</span> | 
|  | <span class="sd">        ...    path=r'%s/to_csv/foo.csv' % path</span> | 
|  | <span class="sd">        ... ).sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">                           date country  code</span> | 
|  | <span class="sd">        ... 2012-01-31 12:00:00      KR     1</span> | 
|  | <span class="sd">        ... 2012-02-29 12:00:00      US     2</span> | 
|  | <span class="sd">        ... 2012-03-31 12:00:00      US     3</span> | 
|  |  | 
|  | <span class="sd">        In case of Series,</span> | 
|  |  | 
|  | <span class="sd">        >>> print(df.date.to_csv())  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">        date</span> | 
|  | <span class="sd">        2012-01-31 12:00:00</span> | 
|  | <span class="sd">        2012-02-29 12:00:00</span> | 
|  | <span class="sd">        2012-03-31 12:00:00</span> | 
|  |  | 
|  | <span class="sd">        >>> df.date.to_csv(path=r'%s/to_csv/foo.csv' % path, num_files=1)</span> | 
|  | <span class="sd">        >>> ps.read_csv(</span> | 
|  | <span class="sd">        ...     path=r'%s/to_csv/foo.csv' % path</span> | 
|  | <span class="sd">        ... ).sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">                           date</span> | 
|  | <span class="sd">        ... 2012-01-31 12:00:00</span> | 
|  | <span class="sd">        ... 2012-02-29 12:00:00</span> | 
|  | <span class="sd">        ... 2012-03-31 12:00:00</span> | 
|  |  | 
|  | <span class="sd">        You can preserve the index in the roundtrip as below.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.set_index("country", append=True, inplace=True)</span> | 
|  | <span class="sd">        >>> df.date.to_csv(</span> | 
|  | <span class="sd">        ...     path=r'%s/to_csv/bar.csv' % path,</span> | 
|  | <span class="sd">        ...     num_files=1,</span> | 
|  | <span class="sd">        ...     index_col=["index1", "index2"])</span> | 
|  | <span class="sd">        >>> ps.read_csv(</span> | 
|  | <span class="sd">        ...     path=r'%s/to_csv/bar.csv' % path, index_col=["index1", "index2"]</span> | 
|  | <span class="sd">        ... ).sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">                                     date</span> | 
|  | <span class="sd">        index1 index2</span> | 
|  | <span class="sd">        ...    ...    2012-01-31 12:00:00</span> | 
|  | <span class="sd">        ...    ...    2012-02-29 12:00:00</span> | 
|  | <span class="sd">        ...    ...    2012-03-31 12:00:00</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="s2">"options"</span> <span class="ow">in</span> <span class="n">options</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"options"</span><span class="p">),</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">options</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">options</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"options"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="c1"># If path is none, just collect and use pandas's to_csv.</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span> | 
|  | <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">sep</span><span class="o">=</span><span class="n">sep</span><span class="p">,</span> | 
|  | <span class="n">na_rep</span><span class="o">=</span><span class="n">na_rep</span><span class="p">,</span> | 
|  | <span class="n">columns</span><span class="o">=</span><span class="n">columns</span><span class="p">,</span> | 
|  | <span class="n">header</span><span class="o">=</span><span class="n">header</span><span class="p">,</span> | 
|  | <span class="n">quotechar</span><span class="o">=</span><span class="n">quotechar</span><span class="p">,</span> | 
|  | <span class="n">date_format</span><span class="o">=</span><span class="n">date_format</span><span class="p">,</span> | 
|  | <span class="n">escapechar</span><span class="o">=</span><span class="n">escapechar</span><span class="p">,</span> | 
|  | <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">)</span> | 
|  | <span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">columns</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">column_labels</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">column_labels</span> <span class="o">=</span> <span class="p">[]</span> | 
|  | <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span> | 
|  | <span class="k">if</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">col</span><span class="p">):</span> | 
|  | <span class="n">label</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">label</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="p">(</span><span class="n">col</span><span class="p">,))</span> | 
|  | <span class="k">if</span> <span class="n">label</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="n">label</span><span class="p">))</span> | 
|  | <span class="n">column_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index_col</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> | 
|  | <span class="n">index_cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">index_col</span><span class="p">]</span> | 
|  | <span class="k">elif</span> <span class="n">index_col</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">index_cols</span> <span class="o">=</span> <span class="p">[]</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">index_cols</span> <span class="o">=</span> <span class="n">index_col</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">header</span> <span class="ow">is</span> <span class="kc">True</span> <span class="ow">and</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels_level</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"to_csv only support one-level index column now"</span><span class="p">)</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">header</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">to_spark</span><span class="p">(</span><span class="n">index_col</span><span class="p">)</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span> | 
|  | <span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">label</span><span class="p">))</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">index_cols</span><span class="p">]</span> | 
|  | <span class="o">+</span> <span class="p">[</span> | 
|  | <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">if</span> <span class="n">label</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">label</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span> | 
|  | <span class="n">new_name</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">label</span><span class="p">,</span> <span class="n">new_name</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">column_labels</span><span class="p">,</span> <span class="n">header</span><span class="p">))</span> | 
|  | <span class="p">]</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">header</span> <span class="o">=</span> <span class="kc">True</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">to_spark</span><span class="p">(</span><span class="n">index_col</span><span class="p">)</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span> | 
|  | <span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">label</span><span class="p">))</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">index_cols</span><span class="p">]</span> | 
|  | <span class="o">+</span> <span class="p">[</span> | 
|  | <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">if</span> <span class="n">label</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">label</span><span class="p">))</span> | 
|  | <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">label</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">column_labels</span><span class="p">)</span> | 
|  | <span class="p">]</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">num_files</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"`num_files` has been deprecated and might be removed in a future version. "</span> | 
|  | <span class="s2">"Use `DataFrame.spark.repartition` instead."</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">repartition</span><span class="p">(</span><span class="n">num_files</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">mode</span> <span class="o">=</span> <span class="n">validate_mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span> | 
|  | <span class="n">builder</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">write</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">partition_cols</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">builder</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">partition_cols</span><span class="p">)</span> | 
|  | <span class="n">builder</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span> | 
|  | <span class="n">sep</span><span class="o">=</span><span class="n">sep</span><span class="p">,</span> | 
|  | <span class="n">nullValue</span><span class="o">=</span><span class="n">na_rep</span><span class="p">,</span> | 
|  | <span class="n">header</span><span class="o">=</span><span class="n">header</span><span class="p">,</span> | 
|  | <span class="n">quote</span><span class="o">=</span><span class="n">quotechar</span><span class="p">,</span> | 
|  | <span class="n">dateFormat</span><span class="o">=</span><span class="n">date_format</span><span class="p">,</span> | 
|  | <span class="n">charToEscapeQuoteEscaping</span><span class="o">=</span><span class="n">escapechar</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">builder</span><span class="o">.</span><span class="n">options</span><span class="p">(</span><span class="o">**</span><span class="n">options</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">"csv"</span><span class="p">)</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">to_json</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">compression</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"uncompressed"</span><span class="p">,</span> | 
|  | <span class="n">num_files</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"w"</span><span class="p">,</span> | 
|  | <span class="n">orient</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"records"</span><span class="p">,</span> | 
|  | <span class="n">lines</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">partition_cols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">index_col</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="o">**</span><span class="n">options</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Convert the object to a JSON string.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: pandas-on-Spark `to_json` writes files to a path or URI. Unlike pandas',</span> | 
|  | <span class="sd">            pandas-on-Spark respects HDFS's property such as 'fs.default.name'.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: pandas-on-Spark writes JSON files into the directory, `path`, and writes</span> | 
|  | <span class="sd">            multiple `part-...` files in the directory when `path` is specified.</span> | 
|  | <span class="sd">            This behavior was inherited from Apache Spark. The number of partitions can</span> | 
|  | <span class="sd">            be controlled by `num_files`. This is deprecated.</span> | 
|  | <span class="sd">            Use `DataFrame.spark.repartition` instead.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: output JSON format is different from pandas'. It always uses `orient='records'`</span> | 
|  | <span class="sd">            for its output. This behavior might have to change soon.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: Set `ignoreNullFields` keyword argument to `True` to omit `None` or `NaN` values</span> | 
|  | <span class="sd">            when writing JSON objects. It works only when `path` is provided.</span> | 
|  |  | 
|  | <span class="sd">        Note NaN's and None will be converted to null and datetime objects</span> | 
|  | <span class="sd">        will be converted to UNIX timestamps.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        path: string, optional</span> | 
|  | <span class="sd">            File path. If not specified, the result is returned as</span> | 
|  | <span class="sd">            a string.</span> | 
|  | <span class="sd">        lines: bool, default True</span> | 
|  | <span class="sd">            If ‘orient’ is ‘records’ write out line delimited JSON format.</span> | 
|  | <span class="sd">            Will throw ValueError if incorrect ‘orient’ since others are not</span> | 
|  | <span class="sd">            list like. It should be always True for now.</span> | 
|  | <span class="sd">        orient: str, default 'records'</span> | 
|  | <span class="sd">             It should be always 'records' for now.</span> | 
|  | <span class="sd">        compression: {'gzip', 'bz2', 'xz', None}</span> | 
|  | <span class="sd">            A string representing the compression to use in the output file,</span> | 
|  | <span class="sd">            only used when the first argument is a filename. By default, the</span> | 
|  | <span class="sd">            compression is inferred from the filename.</span> | 
|  | <span class="sd">        num_files: the number of partitions to be written in `path` directory when</span> | 
|  | <span class="sd">            this is a path. This is deprecated. Use `DataFrame.spark.repartition` instead.</span> | 
|  | <span class="sd">        mode: str</span> | 
|  | <span class="sd">            Python write mode, default 'w'.</span> | 
|  |  | 
|  | <span class="sd">            .. note:: mode can accept the strings for Spark writing mode.</span> | 
|  | <span class="sd">                Such as 'append', 'overwrite', 'ignore', 'error', 'errorifexists'.</span> | 
|  |  | 
|  | <span class="sd">                - 'append' (equivalent to 'a'): Append the new data to existing data.</span> | 
|  | <span class="sd">                - 'overwrite' (equivalent to 'w'): Overwrite existing data.</span> | 
|  | <span class="sd">                - 'ignore': Silently ignore this operation if data already exists.</span> | 
|  | <span class="sd">                - 'error' or 'errorifexists': Throw an exception if data already exists.</span> | 
|  |  | 
|  | <span class="sd">        partition_cols: str or list of str, optional, default None</span> | 
|  | <span class="sd">            Names of partitioning columns</span> | 
|  | <span class="sd">        index_col: str or list of str, optional, default: None</span> | 
|  | <span class="sd">            Column names to be used in Spark to represent pandas-on-Spark's index. The index name</span> | 
|  | <span class="sd">            in pandas-on-Spark is ignored. By default, the index is always lost.</span> | 
|  | <span class="sd">        options: keyword arguments for additional options specific to PySpark.</span> | 
|  | <span class="sd">            It is specific to PySpark's JSON options to pass. Check</span> | 
|  | <span class="sd">            the options in PySpark's API documentation for `spark.write.json(...)`.</span> | 
|  | <span class="sd">            It has a higher priority and overwrites all other options.</span> | 
|  | <span class="sd">            This parameter only works when `path` is specified.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        str or None</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame([['a', 'b'], ['c', 'd']],</span> | 
|  | <span class="sd">        ...                   columns=['col 1', 'col 2'])</span> | 
|  | <span class="sd">        >>> df.to_json()</span> | 
|  | <span class="sd">        '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'</span> | 
|  |  | 
|  | <span class="sd">        >>> df['col 1'].to_json()</span> | 
|  | <span class="sd">        '[{"col 1":"a"},{"col 1":"c"}]'</span> | 
|  |  | 
|  | <span class="sd">        >>> df.to_json(path=r'%s/to_json/foo.json' % path, num_files=1)</span> | 
|  | <span class="sd">        >>> ps.read_json(</span> | 
|  | <span class="sd">        ...     path=r'%s/to_json/foo.json' % path</span> | 
|  | <span class="sd">        ... ).sort_values(by="col 1")</span> | 
|  | <span class="sd">          col 1 col 2</span> | 
|  | <span class="sd">        0     a     b</span> | 
|  | <span class="sd">        1     c     d</span> | 
|  |  | 
|  | <span class="sd">        >>> df['col 1'].to_json(path=r'%s/to_json/foo.json' % path, num_files=1, index_col="index")</span> | 
|  | <span class="sd">        >>> ps.read_json(</span> | 
|  | <span class="sd">        ...     path=r'%s/to_json/foo.json' % path, index_col="index"</span> | 
|  | <span class="sd">        ... ).sort_values(by="col 1")  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">              col 1</span> | 
|  | <span class="sd">        index</span> | 
|  | <span class="sd">        0         a</span> | 
|  | <span class="sd">        1         c</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="s2">"options"</span> <span class="ow">in</span> <span class="n">options</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"options"</span><span class="p">),</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">options</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">options</span> <span class="o">=</span> <span class="n">options</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"options"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">default_options</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"ignoreNullFields"</span><span class="p">:</span> <span class="kc">False</span><span class="p">}</span> | 
|  | <span class="n">options</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">default_options</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">}</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="n">lines</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">"lines=False is not implemented yet."</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">orient</span> <span class="o">!=</span> <span class="s2">"records"</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">"orient='records' is supported only for now."</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="c1"># If path is none, just collect and use pandas's to_json.</span> | 
|  | <span class="n">psdf_or_ser</span> <span class="o">=</span> <span class="bp">self</span> | 
|  | <span class="n">pdf</span> <span class="o">=</span> <span class="n">psdf_or_ser</span><span class="o">.</span><span class="n">_to_pandas</span><span class="p">()</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="n">pdf</span> <span class="o">=</span> <span class="n">pdf</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span> | 
|  | <span class="c1"># To make the format consistent and readable by `read_json`, convert it to pandas' and</span> | 
|  | <span class="c1"># use 'records' orient for now.</span> | 
|  | <span class="k">return</span> <span class="n">pdf</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">"records"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">)</span> | 
|  | <span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">to_spark</span><span class="p">(</span><span class="n">index_col</span><span class="o">=</span><span class="n">index_col</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">num_files</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"`num_files` has been deprecated and might be removed in a future version. "</span> | 
|  | <span class="s2">"Use `DataFrame.spark.repartition` instead."</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">repartition</span><span class="p">(</span><span class="n">num_files</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">mode</span> <span class="o">=</span> <span class="n">validate_mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span> | 
|  | <span class="n">builder</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">write</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">partition_cols</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">builder</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="n">partition_cols</span><span class="p">)</span> | 
|  | <span class="n">builder</span><span class="o">.</span><span class="n">_set_opts</span><span class="p">(</span><span class="n">compression</span><span class="o">=</span><span class="n">compression</span><span class="p">)</span> | 
|  | <span class="n">builder</span><span class="o">.</span><span class="n">options</span><span class="p">(</span><span class="o">**</span><span class="n">options</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">"json"</span><span class="p">)</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">to_excel</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">excel_writer</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">],</span> | 
|  | <span class="n">sheet_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"Sheet1"</span><span class="p">,</span> | 
|  | <span class="n">na_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> | 
|  | <span class="n">float_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">header</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">index_label</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">startrow</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="n">startcol</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="n">engine</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">merge_cells</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inf_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"inf"</span><span class="p">,</span> | 
|  | <span class="n">freeze_panes</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Write object to an Excel sheet.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: This method should only be used if the resulting DataFrame is expected</span> | 
|  | <span class="sd">                  to be small, as all the data is loaded into the driver's memory.</span> | 
|  |  | 
|  | <span class="sd">        To write a single object to an Excel .xlsx file it is only necessary to</span> | 
|  | <span class="sd">        specify a target file name. To write to multiple sheets it is necessary to</span> | 
|  | <span class="sd">        create an `ExcelWriter` object with a target file name, and specify a sheet</span> | 
|  | <span class="sd">        in the file to write to.</span> | 
|  |  | 
|  | <span class="sd">        Multiple sheets may be written to by specifying unique `sheet_name`.</span> | 
|  | <span class="sd">        With all data written to the file it is necessary to save the changes.</span> | 
|  | <span class="sd">        Note that creating an `ExcelWriter` object with a file name that already</span> | 
|  | <span class="sd">        exists will result in the contents of the existing file being erased.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        excel_writer: str or ExcelWriter object</span> | 
|  | <span class="sd">            File path or existing ExcelWriter.</span> | 
|  | <span class="sd">        sheet_name: str, default 'Sheet1'</span> | 
|  | <span class="sd">            Name of sheet which will contain DataFrame.</span> | 
|  | <span class="sd">        na_rep: str, default ''</span> | 
|  | <span class="sd">            Missing data representation.</span> | 
|  | <span class="sd">        float_format: str, optional</span> | 
|  | <span class="sd">            Format string for floating point numbers. For example</span> | 
|  | <span class="sd">            ``float_format="%%.2f"`` will format 0.1234 to 0.12.</span> | 
|  | <span class="sd">        columns: sequence or list of str, optional</span> | 
|  | <span class="sd">            Columns to write.</span> | 
|  | <span class="sd">        header: bool or list of str, default True</span> | 
|  | <span class="sd">            Write out the column names. If a list of string is given it is</span> | 
|  | <span class="sd">            assumed to be aliases for the column names.</span> | 
|  | <span class="sd">        index: bool, default True</span> | 
|  | <span class="sd">            Write row names (index).</span> | 
|  | <span class="sd">        index_label: str or sequence, optional</span> | 
|  | <span class="sd">            Column label for index column(s) if desired. If not specified, and</span> | 
|  | <span class="sd">            `header` and `index` are True, then the index names are used. A</span> | 
|  | <span class="sd">            sequence should be given if the DataFrame uses MultiIndex.</span> | 
|  | <span class="sd">        startrow: int, default 0</span> | 
|  | <span class="sd">            Upper left cell row to dump data frame.</span> | 
|  | <span class="sd">        startcol: int, default 0</span> | 
|  | <span class="sd">            Upper left cell column to dump data frame.</span> | 
|  | <span class="sd">        engine: str, optional</span> | 
|  | <span class="sd">            Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this</span> | 
|  | <span class="sd">            via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and</span> | 
|  | <span class="sd">            ``io.excel.xlsm.writer``.</span> | 
|  | <span class="sd">        merge_cells: bool, default True</span> | 
|  | <span class="sd">            Write MultiIndex and Hierarchical Rows as merged cells.</span> | 
|  | <span class="sd">        inf_rep: str, default 'inf'</span> | 
|  | <span class="sd">            Representation for infinity (there is no native representation for</span> | 
|  | <span class="sd">            infinity in Excel).</span> | 
|  | <span class="sd">        freeze_panes: tuple of int (length 2), optional</span> | 
|  | <span class="sd">            Specifies the one-based bottommost row and rightmost column that</span> | 
|  | <span class="sd">            is to be frozen.</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        Once a workbook has been saved it is not possible write further data</span> | 
|  | <span class="sd">        without rewriting the whole workbook.</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        read_excel: Read Excel file.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        Create, write to, and save a workbook:</span> | 
|  |  | 
|  | <span class="sd">        >>> df1 = ps.DataFrame([['a', 'b'], ['c', 'd']],</span> | 
|  | <span class="sd">        ...                    index=['row 1', 'row 2'],</span> | 
|  | <span class="sd">        ...                    columns=['col 1', 'col 2'])</span> | 
|  | <span class="sd">        >>> df1.to_excel("output.xlsx")  # doctest: +SKIP</span> | 
|  |  | 
|  | <span class="sd">        To specify the sheet name:</span> | 
|  |  | 
|  | <span class="sd">        >>> df1.to_excel("output.xlsx")  # doctest: +SKIP</span> | 
|  | <span class="sd">        >>> df1.to_excel("output.xlsx",</span> | 
|  | <span class="sd">        ...              sheet_name='Sheet_name_1')  # doctest: +SKIP</span> | 
|  |  | 
|  | <span class="sd">        If you wish to write to more than one sheet in the workbook, it is</span> | 
|  | <span class="sd">        necessary to specify an ExcelWriter object:</span> | 
|  |  | 
|  | <span class="sd">        >>> with pd.ExcelWriter('output.xlsx') as writer:  # doctest: +SKIP</span> | 
|  | <span class="sd">        ...      df1.to_excel(writer, sheet_name='Sheet_name_1')</span> | 
|  | <span class="sd">        ...      df2.to_excel(writer, sheet_name='Sheet_name_2')</span> | 
|  |  | 
|  | <span class="sd">        To set the library that is used to write the Excel file,</span> | 
|  | <span class="sd">        you can pass the `engine` keyword (the default engine is</span> | 
|  | <span class="sd">        automatically chosen depending on the file extension):</span> | 
|  |  | 
|  | <span class="sd">        >>> df1.to_excel('output1.xlsx', engine='xlsxwriter')  # doctest: +SKIP</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">log_advice</span><span class="p">(</span> | 
|  | <span class="s2">"`to_excel` loads all data into the driver's memory. "</span> | 
|  | <span class="s2">"It should only be used if the resulting DataFrame is expected to be small."</span> | 
|  | <span class="p">)</span> | 
|  | <span class="c1"># Make sure locals() call is at the top of the function so we don't capture local variables.</span> | 
|  | <span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span> | 
|  | <span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">f</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">to_excel</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="n">f</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_excel</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Constructor expects DataFrame or Series; however, "</span> <span class="s2">"got [</span><span class="si">%s</span><span class="s2">]"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="p">,)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span> | 
|  | <span class="n">psdf</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_excel</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">args</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">to_hdf</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">path_or_buf</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">HDFStore</span><span class="p">],</span> | 
|  | <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> | 
|  | <span class="n">mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"a"</span><span class="p">,</span> | 
|  | <span class="n">complevel</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">complib</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">append</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="nb">format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">min_itemsize</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">nan_rep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">dropna</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">data_columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">errors</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"strict"</span><span class="p">,</span> | 
|  | <span class="n">encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"UTF-8"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Write the contained data to an HDF5 file using HDFStore.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: This method should only be used if the resulting DataFrame is expected</span> | 
|  | <span class="sd">                  to be small, as all the data is loaded into the driver's memory.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 4.0.0</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        path_or_buf : str or pandas.HDFStore</span> | 
|  | <span class="sd">            File path or HDFStore object.</span> | 
|  | <span class="sd">        key : str</span> | 
|  | <span class="sd">            Identifier for the group in the store.</span> | 
|  | <span class="sd">        mode : {'a', 'w', 'r+'}, default 'a'</span> | 
|  | <span class="sd">            Mode to open file:</span> | 
|  |  | 
|  | <span class="sd">            - 'w': write, a new file is created (an existing file with</span> | 
|  | <span class="sd">              the same name would be deleted).</span> | 
|  | <span class="sd">            - 'a': append, an existing file is opened for reading and</span> | 
|  | <span class="sd">              writing, and if the file does not exist it is created.</span> | 
|  | <span class="sd">            - 'r+': similar to 'a', but the file must already exist.</span> | 
|  |  | 
|  | <span class="sd">        complevel : {0-9}, default None</span> | 
|  | <span class="sd">            Specifies a compression level for data.</span> | 
|  | <span class="sd">            A value of 0 or None disables compression.</span> | 
|  | <span class="sd">        complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'</span> | 
|  | <span class="sd">            Specifies the compression library to be used.</span> | 
|  | <span class="sd">            These additional compressors for Blosc are supported</span> | 
|  | <span class="sd">            (default if no compressor specified: 'blosc:blosclz'):</span> | 
|  | <span class="sd">            {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',</span> | 
|  | <span class="sd">            'blosc:zlib', 'blosc:zstd'}.</span> | 
|  | <span class="sd">            Specifying a compression library which is not available issues</span> | 
|  | <span class="sd">            a ValueError.</span> | 
|  | <span class="sd">        append : bool, default False</span> | 
|  | <span class="sd">            For Table formats, append the input data to the existing.</span> | 
|  | <span class="sd">        format : {'fixed', 'table', None}, default 'fixed'</span> | 
|  | <span class="sd">            Possible values:</span> | 
|  |  | 
|  | <span class="sd">            - 'fixed': Fixed format. Fast writing/reading. Not-appendable,</span> | 
|  | <span class="sd">              nor searchable.</span> | 
|  | <span class="sd">            - 'table': Table format. Write as a PyTables Table structure</span> | 
|  | <span class="sd">              which may perform worse but allow more flexible operations</span> | 
|  | <span class="sd">              like searching / selecting subsets of the data.</span> | 
|  | <span class="sd">            - If None, pd.get_option('io.hdf.default_format') is checked,</span> | 
|  | <span class="sd">              followed by fallback to "fixed".</span> | 
|  |  | 
|  | <span class="sd">        index : bool, default True</span> | 
|  | <span class="sd">            Write DataFrame index as a column.</span> | 
|  | <span class="sd">        min_itemsize : dict or int, optional</span> | 
|  | <span class="sd">            Map column names to minimum string sizes for columns.</span> | 
|  | <span class="sd">        nan_rep : Any, optional</span> | 
|  | <span class="sd">            How to represent null values as str.</span> | 
|  | <span class="sd">            Not allowed with append=True.</span> | 
|  | <span class="sd">        dropna : bool, default False, optional</span> | 
|  | <span class="sd">            Remove missing values.</span> | 
|  | <span class="sd">        data_columns : list of columns or True, optional</span> | 
|  | <span class="sd">            List of columns to create as indexed data columns for on-disk</span> | 
|  | <span class="sd">            queries, or True to use all columns. By default only the axes</span> | 
|  | <span class="sd">            of the object are indexed. Applicable only to format='table'.</span> | 
|  | <span class="sd">        errors : str, default 'strict'</span> | 
|  | <span class="sd">            Specifies how encoding and decoding errors are to be handled.</span> | 
|  | <span class="sd">            See the errors argument for :func:`open` for a full list</span> | 
|  | <span class="sd">            of options.</span> | 
|  | <span class="sd">        encoding : str, default "UTF-8"</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.to_orc : Write a DataFrame to the binary orc format.</span> | 
|  | <span class="sd">        DataFrame.to_parquet : Write a DataFrame to the binary parquet format.</span> | 
|  | <span class="sd">        DataFrame.to_csv : Write out to a csv file.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},</span> | 
|  | <span class="sd">        ...                   index=['a', 'b', 'c'])  # doctest: +SKIP</span> | 
|  | <span class="sd">        >>> df.to_hdf('data.h5', key='df', mode='w')  # doctest: +SKIP</span> | 
|  |  | 
|  | <span class="sd">        We can add another object to the same file:</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([1, 2, 3, 4])  # doctest: +SKIP</span> | 
|  | <span class="sd">        >>> s.to_hdf('data.h5', key='s')  # doctest: +SKIP</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">log_advice</span><span class="p">(</span> | 
|  | <span class="s2">"`to_hdf` loads all data into the driver's memory. "</span> | 
|  | <span class="s2">"It should only be used if the resulting DataFrame is expected to be small."</span> | 
|  | <span class="p">)</span> | 
|  | <span class="c1"># Make sure locals() call is at the top of the function so we don't capture local variables.</span> | 
|  | <span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span> | 
|  | <span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">f</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">to_hdf</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="n">f</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_hdf</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Constructor expects DataFrame or Series; however, "</span> <span class="s2">"got [</span><span class="si">%s</span><span class="s2">]"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="p">,)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span> | 
|  | <span class="n">psdf</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_hdf</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">args</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the mean of the values.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        mean: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.mean()</span> | 
|  | <span class="sd">        a    2.0</span> | 
|  | <span class="sd">        b    0.2</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.mean(axis=1)</span> | 
|  | <span class="sd">        0    0.55</span> | 
|  | <span class="sd">        1    1.10</span> | 
|  | <span class="sd">        2    1.65</span> | 
|  | <span class="sd">        3     NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].mean()</span> | 
|  | <span class="sd">        2.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">spark_column</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">mean</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"mean"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">sum</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">min_count</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the sum of the values.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Added *skipna* to exclude.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  | <span class="sd">        min_count: int, default 0</span> | 
|  | <span class="sd">            The required number of valid values to perform the operation. If fewer than</span> | 
|  | <span class="sd">             ``min_count`` non-NA values are present the result will be NA.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        sum: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, np.nan, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.sum()</span> | 
|  | <span class="sd">        a    6.0</span> | 
|  | <span class="sd">        b    0.4</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.sum(axis=1)</span> | 
|  | <span class="sd">        0    1.1</span> | 
|  | <span class="sd">        1    2.0</span> | 
|  | <span class="sd">        2    3.3</span> | 
|  | <span class="sd">        3    0.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.sum(min_count=3)</span> | 
|  | <span class="sd">        a    6.0</span> | 
|  | <span class="sd">        b    NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.sum(axis=1, min_count=1)</span> | 
|  | <span class="sd">        0    1.1</span> | 
|  | <span class="sd">        1    2.0</span> | 
|  | <span class="sd">        2    3.3</span> | 
|  | <span class="sd">        3    NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].sum()</span> | 
|  | <span class="sd">        6.0</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].sum(min_count=3)</span> | 
|  | <span class="sd">        6.0</span> | 
|  | <span class="sd">        >>> df['b'].sum(min_count=3)</span> | 
|  | <span class="sd">        nan</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"The behavior of DataFrame.sum with axis=None is deprecated, "</span> | 
|  | <span class="s2">"in a future version this will reduce over both axes and return a scalar. "</span> | 
|  | <span class="s2">"To retain the old behavior, pass axis=0 (or do not pass axis)"</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  | <span class="k">elif</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">True</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">spark_column</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="nb">sum</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"sum"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">min_count</span><span class="o">=</span><span class="n">min_count</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">product</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">min_count</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the product of the values.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: unlike pandas', pandas-on-Spark's emulates product by ``exp(sum(log(...)))``</span> | 
|  | <span class="sd">            trick. Therefore, it only works for positive numbers.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  | <span class="sd">        min_count: int, default 0</span> | 
|  | <span class="sd">            The required number of valid values to perform the operation. If fewer than</span> | 
|  | <span class="sd">            ``min_count`` non-NA values are present the result will be NA.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        Non-numeric type column is not included to the result.</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf = ps.DataFrame({'A': [1, 2, 3, 4, 5],</span> | 
|  | <span class="sd">        ...                     'B': [10, 20, 30, 40, 50],</span> | 
|  | <span class="sd">        ...                     'C': ['a', 'b', 'c', 'd', 'e']})</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">           A   B  C</span> | 
|  | <span class="sd">        0  1  10  a</span> | 
|  | <span class="sd">        1  2  20  b</span> | 
|  | <span class="sd">        2  3  30  c</span> | 
|  | <span class="sd">        3  4  40  d</span> | 
|  | <span class="sd">        4  5  50  e</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.prod()</span> | 
|  | <span class="sd">        A         120</span> | 
|  | <span class="sd">        B    12000000</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        If there is no numeric type columns, returns empty Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.DataFrame({"key": ['a', 'b', 'c'], "val": ['x', 'y', 'z']}).prod()  # doctest: +SKIP</span> | 
|  | <span class="sd">        Series([], dtype: float64)</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([1, 2, 3, 4, 5]).prod()</span> | 
|  | <span class="sd">        120</span> | 
|  |  | 
|  | <span class="sd">        By default, the product of an empty or all-NA Series is ``1``</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([]).prod()  # doctest: +SKIP</span> | 
|  | <span class="sd">        1.0</span> | 
|  |  | 
|  | <span class="sd">        This can be controlled with the ``min_count`` parameter</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([]).prod(min_count=1)  # doctest: +SKIP</span> | 
|  | <span class="sd">        nan</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"The behavior of DataFrame.product with axis=None is deprecated, "</span> | 
|  | <span class="s2">"in a future version this will reduce over both axes and return a scalar. "</span> | 
|  | <span class="s2">"To retain the old behavior, pass axis=0 (or do not pass axis)"</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  | <span class="k">elif</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">True</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">prod</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">SF</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">spark_column</span><span class="p">,</span> <span class="n">skipna</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">prod</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"prod"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">min_count</span><span class="o">=</span><span class="n">min_count</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">prod</span> <span class="o">=</span> <span class="n">product</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">skew</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return unbiased skew normalized by N-1.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        skew: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.skew()</span> | 
|  | <span class="sd">        a    0.0</span> | 
|  | <span class="sd">        b    0.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].skew()</span> | 
|  | <span class="sd">        0.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">SF</span><span class="o">.</span><span class="n">skew</span><span class="p">(</span><span class="n">spark_column</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">skew</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"skew"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">kurtosis</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return unbiased kurtosis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0).</span> | 
|  | <span class="sd">        Normalized by N-1.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        kurt: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan, 6], 'b': [0.1, 0.2, 0.3, np.nan, 0.8]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.kurtosis()</span> | 
|  | <span class="sd">        a    1.500000</span> | 
|  | <span class="sd">        b    2.703924</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].kurtosis()</span> | 
|  | <span class="sd">        1.5</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">kurtosis</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">SF</span><span class="o">.</span><span class="n">kurt</span><span class="p">(</span><span class="n">spark_column</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">kurtosis</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"kurtosis"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">kurt</span> <span class="o">=</span> <span class="n">kurtosis</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">min</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the minimum of the values.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            If True, include only float, int, boolean columns. This parameter is mainly for</span> | 
|  | <span class="sd">            pandas compatibility. False is supported; however, the columns should</span> | 
|  | <span class="sd">            be all numeric or all non-numeric.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        min: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.min()</span> | 
|  | <span class="sd">        a    1.0</span> | 
|  | <span class="sd">        b    0.1</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.min(axis=1)</span> | 
|  | <span class="sd">        0    0.1</span> | 
|  | <span class="sd">        1    0.2</span> | 
|  | <span class="sd">        2    0.3</span> | 
|  | <span class="sd">        3    NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].min()</span> | 
|  | <span class="sd">        1.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  | <span class="k">elif</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">True</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">),</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"min"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">max</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the maximum of the values.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            If True, include only float, int, boolean columns. This parameter is mainly for</span> | 
|  | <span class="sd">            pandas compatibility. False is supported; however, the columns should</span> | 
|  | <span class="sd">            be all numeric or all non-numeric.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        max: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.max()</span> | 
|  | <span class="sd">        a    3.0</span> | 
|  | <span class="sd">        b    0.3</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.max(axis=1)</span> | 
|  | <span class="sd">        0    1.0</span> | 
|  | <span class="sd">        1    2.0</span> | 
|  | <span class="sd">        2    3.0</span> | 
|  | <span class="sd">        3    NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].max()</span> | 
|  | <span class="sd">        3.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  | <span class="k">elif</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">True</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">),</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"max"</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">count</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Count non-NA cells for each column.</span> | 
|  |  | 
|  | <span class="sd">        The values `None`, `NaN` are considered NA.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {0 or ‘index’, 1 or ‘columns’}, default 0</span> | 
|  | <span class="sd">            If 0 or ‘index’ counts are generated for each column. If 1 or ‘columns’ counts are</span> | 
|  | <span class="sd">            generated for each row.</span> | 
|  | <span class="sd">        numeric_only: bool, default False</span> | 
|  | <span class="sd">            If True, include only float, int, boolean columns. This parameter is mainly for</span> | 
|  | <span class="sd">            pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        max: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.shape: Number of DataFrame rows and columns (including NA</span> | 
|  | <span class="sd">            elements).</span> | 
|  | <span class="sd">        DataFrame.isna: Boolean same-sized DataFrame showing places of NA</span> | 
|  | <span class="sd">            elements.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        Constructing DataFrame from a dictionary:</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({"Person":</span> | 
|  | <span class="sd">        ...                    ["John", "Myla", "Lewis", "John", "Myla"],</span> | 
|  | <span class="sd">        ...                    "Age": [24., np.nan, 21., 33, 26],</span> | 
|  | <span class="sd">        ...                    "Single": [False, True, True, True, False]},</span> | 
|  | <span class="sd">        ...                   columns=["Person", "Age", "Single"])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">          Person   Age  Single</span> | 
|  | <span class="sd">        0   John  24.0   False</span> | 
|  | <span class="sd">        1   Myla   NaN    True</span> | 
|  | <span class="sd">        2  Lewis  21.0    True</span> | 
|  | <span class="sd">        3   John  33.0    True</span> | 
|  | <span class="sd">        4   Myla  26.0   False</span> | 
|  |  | 
|  | <span class="sd">        Notice the uncounted NA values:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.count()</span> | 
|  | <span class="sd">        Person    5</span> | 
|  | <span class="sd">        Age       4</span> | 
|  | <span class="sd">        Single    5</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.count(axis=1)</span> | 
|  | <span class="sd">        0    3</span> | 
|  | <span class="sd">        1    2</span> | 
|  | <span class="sd">        2    3</span> | 
|  | <span class="sd">        3    3</span> | 
|  | <span class="sd">        4    3</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['Person'].count()</span> | 
|  | <span class="sd">        5</span> | 
|  |  | 
|  | <span class="sd">        >>> df['Age'].count()</span> | 
|  | <span class="sd">        4</span> | 
|  | <span class="sd">        """</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">Frame</span><span class="o">.</span><span class="n">_count_expr</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"count"</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">std</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">ddof</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return sample standard deviation.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 3.3.0</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        ddof: int, default 1</span> | 
|  | <span class="sd">            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,</span> | 
|  | <span class="sd">            where N represents the number of elements.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including arbitary integers.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        std: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.std()</span> | 
|  | <span class="sd">        a    1.0</span> | 
|  | <span class="sd">        b    0.1</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.std(ddof=2)</span> | 
|  | <span class="sd">        a    1.414214</span> | 
|  | <span class="sd">        b    0.141421</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.std(axis=1)</span> | 
|  | <span class="sd">        0    0.636396</span> | 
|  | <span class="sd">        1    1.272792</span> | 
|  | <span class="sd">        2    1.909188</span> | 
|  | <span class="sd">        3         NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.std(ddof=0)</span> | 
|  | <span class="sd">        a    0.816497</span> | 
|  | <span class="sd">        b    0.081650</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].std()</span> | 
|  | <span class="sd">        1.0</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].std(ddof=0)</span> | 
|  | <span class="sd">        0.816496580927726</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].std(ddof=-1)</span> | 
|  | <span class="sd">        0.707106...</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ddof</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"ddof must be integer"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"The behavior of DataFrame.std with axis=None is deprecated, "</span> | 
|  | <span class="s2">"in a future version this will reduce over both axes and return a scalar. "</span> | 
|  | <span class="s2">"To retain the old behavior, pass axis=0 (or do not pass axis)"</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">SF</span><span class="o">.</span><span class="n">stddev</span><span class="p">(</span><span class="n">spark_column</span><span class="p">,</span> <span class="n">ddof</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">std</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"std"</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> <span class="n">ddof</span><span class="o">=</span><span class="n">ddof</span><span class="p">,</span> <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">var</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">ddof</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return unbiased variance.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 3.3.0</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        ddof: int, default 1</span> | 
|  | <span class="sd">            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,</span> | 
|  | <span class="sd">            where N represents the number of elements.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including arbitary integers.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        var: scalar for a Series, and a Series for a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},</span> | 
|  | <span class="sd">        ...                   columns=['a', 'b'])</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.var()</span> | 
|  | <span class="sd">        a    1.00</span> | 
|  | <span class="sd">        b    0.01</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.var(ddof=2)</span> | 
|  | <span class="sd">        a    2.00</span> | 
|  | <span class="sd">        b    0.02</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.var(axis=1)</span> | 
|  | <span class="sd">        0    0.405</span> | 
|  | <span class="sd">        1    1.620</span> | 
|  | <span class="sd">        2    3.645</span> | 
|  | <span class="sd">        3      NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.var(ddof=0)</span> | 
|  | <span class="sd">        a    0.666667</span> | 
|  | <span class="sd">        b    0.006667</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].var()</span> | 
|  | <span class="sd">        1.0</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].var(ddof=0)</span> | 
|  | <span class="sd">        0.6666666666666666</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].var(ddof=-2)</span> | 
|  | <span class="sd">        0.4</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ddof</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"ddof must be integer"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"The behavior of DataFrame.var with axis=None is deprecated, "</span> | 
|  | <span class="s2">"in a future version this will reduce over both axes and return a scalar. "</span> | 
|  | <span class="s2">"To retain the old behavior, pass axis=0 (or do not pass axis)"</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">SF</span><span class="o">.</span><span class="n">var</span><span class="p">(</span><span class="n">spark_column</span><span class="p">,</span> <span class="n">ddof</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">var</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"var"</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> <span class="n">ddof</span><span class="o">=</span><span class="n">ddof</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">median</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the median of the values for the requested axis.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: Unlike pandas', the median in pandas-on-Spark is an approximated median based upon</span> | 
|  | <span class="sd">            approximate percentile computation because computing median across a large dataset</span> | 
|  | <span class="sd">            is extremely expensive.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  | <span class="sd">        accuracy: int, optional</span> | 
|  | <span class="sd">            Default accuracy of approximation. Larger value means better accuracy.</span> | 
|  | <span class="sd">            The relative error can be deduced by 1.0 / accuracy.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        median: scalar or Series</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame({</span> | 
|  | <span class="sd">        ...     'a': [24., 21., 25., 33., 26.], 'b': [1, 2, 3, 4, 5]}, columns=['a', 'b'])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">              a  b</span> | 
|  | <span class="sd">        0  24.0  1</span> | 
|  | <span class="sd">        1  21.0  2</span> | 
|  | <span class="sd">        2  25.0  3</span> | 
|  | <span class="sd">        3  33.0  4</span> | 
|  | <span class="sd">        4  26.0  5</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.median()</span> | 
|  | <span class="sd">        a    25.0</span> | 
|  | <span class="sd">        b     3.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df['a'].median()</span> | 
|  | <span class="sd">        25.0</span> | 
|  | <span class="sd">        >>> (df['b'] + 100).median()</span> | 
|  | <span class="sd">        103.0</span> | 
|  |  | 
|  | <span class="sd">        For multi-index columns,</span> | 
|  |  | 
|  | <span class="sd">        >>> df.columns = pd.MultiIndex.from_tuples([('x', 'a'), ('y', 'b')])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">              x  y</span> | 
|  | <span class="sd">              a  b</span> | 
|  | <span class="sd">        0  24.0  1</span> | 
|  | <span class="sd">        1  21.0  2</span> | 
|  | <span class="sd">        2  25.0  3</span> | 
|  | <span class="sd">        3  33.0  4</span> | 
|  | <span class="sd">        4  26.0  5</span> | 
|  |  | 
|  | <span class="sd">        On a DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df.median()</span> | 
|  | <span class="sd">        x  a    25.0</span> | 
|  | <span class="sd">        y  b     3.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.median(axis=1)</span> | 
|  | <span class="sd">        0    12.5</span> | 
|  | <span class="sd">        1    11.5</span> | 
|  | <span class="sd">        2    14.0</span> | 
|  | <span class="sd">        3    18.5</span> | 
|  | <span class="sd">        4    15.5</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        On a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df[('x', 'a')].median()</span> | 
|  | <span class="sd">        25.0</span> | 
|  | <span class="sd">        >>> (df[('y', 'b')] + 100).median()</span> | 
|  | <span class="sd">        103.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">accuracy</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"accuracy must be an integer; however, got [</span><span class="si">%s</span><span class="s2">]"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">accuracy</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">median</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="p">(</span><span class="n">BooleanType</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">)):</span> | 
|  | <span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">percentile_approx</span><span class="p">(</span><span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()),</span> <span class="mf">0.5</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">median</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"median"</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">sem</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">ddof</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return unbiased standard error of the mean over requested axis.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 3.3.0</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {index (0), columns (1)}</span> | 
|  | <span class="sd">            Axis for the function to be applied on.</span> | 
|  | <span class="sd">        skipna: bool, default True</span> | 
|  | <span class="sd">            Exclude NA/null values when computing the result.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including NA/null values.</span> | 
|  | <span class="sd">        ddof: int, default 1</span> | 
|  | <span class="sd">            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,</span> | 
|  | <span class="sd">            where N represents the number of elements.</span> | 
|  |  | 
|  | <span class="sd">            .. versionchanged:: 3.4.0</span> | 
|  | <span class="sd">               Supported including arbitary integers.</span> | 
|  | <span class="sd">        numeric_only: bool, default None</span> | 
|  | <span class="sd">            Include only float, int, boolean columns. False is not supported. This parameter</span> | 
|  | <span class="sd">            is mainly for pandas compatibility.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        scalar(for Series) or Series(for DataFrame)</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> psdf = ps.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">           a  b</span> | 
|  | <span class="sd">        0  1  4</span> | 
|  | <span class="sd">        1  2  5</span> | 
|  | <span class="sd">        2  3  6</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.sem()</span> | 
|  | <span class="sd">        a    0.57735</span> | 
|  | <span class="sd">        b    0.57735</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.sem(ddof=0)</span> | 
|  | <span class="sd">        a    0.471405</span> | 
|  | <span class="sd">        b    0.471405</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.sem(ddof=2)</span> | 
|  | <span class="sd">        a    0.816497</span> | 
|  | <span class="sd">        b    0.816497</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.sem(axis=1)</span> | 
|  | <span class="sd">        0    1.5</span> | 
|  | <span class="sd">        1    1.5</span> | 
|  | <span class="sd">        2    1.5</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        Support for Series</span> | 
|  |  | 
|  | <span class="sd">        >>> psser = psdf.a</span> | 
|  | <span class="sd">        >>> psser</span> | 
|  | <span class="sd">        0    1</span> | 
|  | <span class="sd">        1    2</span> | 
|  | <span class="sd">        2    3</span> | 
|  | <span class="sd">        Name: a, dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> psser.sem()</span> | 
|  | <span class="sd">        0.5773502691896258</span> | 
|  |  | 
|  | <span class="sd">        >>> psser.sem(ddof=0)</span> | 
|  | <span class="sd">        0.47140452079103173</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ddof</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"ddof must be integer"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"The behavior of DataFrame.sem with axis=None is deprecated, "</span> | 
|  | <span class="s2">"in a future version this will reduce over both axes and return a scalar. "</span> | 
|  | <span class="s2">"To retain the old behavior, pass axis=0 (or do not pass axis)"</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="n">numeric_only</span> <span class="o">=</span> <span class="kc">True</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="n">spark_column</span> <span class="o">=</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span> | 
|  | <span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">SF</span><span class="o">.</span><span class="n">stddev</span><span class="p">(</span><span class="n">spark_column</span><span class="p">,</span> <span class="n">ddof</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">sem</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">std</span><span class="p">(</span><span class="n">psser</span><span class="p">)</span> <span class="o">/</span> <span class="n">F</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">Frame</span><span class="o">.</span><span class="n">_count_expr</span><span class="p">(</span><span class="n">psser</span><span class="p">))</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span> | 
|  | <span class="n">sem</span><span class="p">,</span> | 
|  | <span class="n">name</span><span class="o">=</span><span class="s2">"sem"</span><span class="p">,</span> | 
|  | <span class="n">numeric_only</span><span class="o">=</span><span class="n">numeric_only</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> | 
|  | <span class="n">ddof</span><span class="o">=</span><span class="n">ddof</span><span class="p">,</span> | 
|  | <span class="n">skipna</span><span class="o">=</span><span class="n">skipna</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="k">def</span> <span class="nf">size</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return an int representing the number of elements in this object.</span> | 
|  |  | 
|  | <span class="sd">        Return the number of rows if Series. Otherwise return the number of</span> | 
|  | <span class="sd">        rows times number of columns if DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> s = ps.Series({'a': 1, 'b': 2, 'c': None})</span> | 
|  | <span class="sd">        >>> s.size</span> | 
|  | <span class="sd">        3</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({'col1': [1, 2, None], 'col2': [3, 4, None]})</span> | 
|  | <span class="sd">        >>> df.size</span> | 
|  | <span class="sd">        6</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame(index=[1, 2, None])</span> | 
|  | <span class="sd">        >>> df.size</span> | 
|  | <span class="sd">        0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">num_columns</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">num_columns</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="mi">0</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">*</span> <span class="n">num_columns</span>  <span class="c1"># type: ignore[arg-type]</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">abs</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return a Series/DataFrame with absolute numeric value of each element.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        abs: Series/DataFrame containing the absolute value of each element.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        Absolute numeric values in a Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([-1.10, 2, -3.33, 4])</span> | 
|  | <span class="sd">        >>> s.abs()</span> | 
|  | <span class="sd">        0    1.10</span> | 
|  | <span class="sd">        1    2.00</span> | 
|  | <span class="sd">        2    3.33</span> | 
|  | <span class="sd">        3    4.00</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        Absolute numeric values in a DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame({</span> | 
|  | <span class="sd">        ...     'a': [4, 5, 6, 7],</span> | 
|  | <span class="sd">        ...     'b': [10, 20, 30, 40],</span> | 
|  | <span class="sd">        ...     'c': [100, 50, -30, -50]</span> | 
|  | <span class="sd">        ...   },</span> | 
|  | <span class="sd">        ...   columns=['a', 'b', 'c'])</span> | 
|  | <span class="sd">        >>> df.abs()</span> | 
|  | <span class="sd">           a   b    c</span> | 
|  | <span class="sd">        0  4  10  100</span> | 
|  | <span class="sd">        1  5  20   50</span> | 
|  | <span class="sd">        2  6  30   30</span> | 
|  | <span class="sd">        3  7  40   50</span> | 
|  | <span class="sd">        """</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">abs</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="s2">"Series"</span><span class="p">,</span> <span class="n">Column</span><span class="p">]:</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span> | 
|  | <span class="k">return</span> <span class="n">psser</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span> | 
|  | <span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span> | 
|  | <span class="n">F</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">),</span> <span class="n">field</span><span class="o">=</span><span class="n">psser</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> | 
|  | <span class="s2">"bad operand type for abs(): </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">)"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | 
|  | <span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">),</span> | 
|  | <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">(),</span> | 
|  | <span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span><span class="nb">abs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: by argument only support the grouping name and as_index only for now. Documentation</span> | 
|  | <span class="c1"># should be updated when it's supported.</span> | 
|  | <span class="k">def</span> <span class="nf">groupby</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">by</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]]],</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="n">as_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"GroupBy[FrameLike]"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Group DataFrame or Series using one or more columns.</span> | 
|  |  | 
|  | <span class="sd">        A groupby operation involves some combination of splitting the</span> | 
|  | <span class="sd">        object, applying a function, and combining the results. This can be</span> | 
|  | <span class="sd">        used to group large amounts of data and compute operations on these</span> | 
|  | <span class="sd">        groups.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        by: Series, label, or list of labels</span> | 
|  | <span class="sd">            Used to determine the groups for the groupby.</span> | 
|  | <span class="sd">            If Series is passed, the Series or dict VALUES</span> | 
|  | <span class="sd">            will be used to determine the groups. A label or list of</span> | 
|  | <span class="sd">            labels may be passed to group by the columns in ``self``.</span> | 
|  | <span class="sd">        axis: int, default 0 or 'index'</span> | 
|  | <span class="sd">            Can only be set to 0 now.</span> | 
|  | <span class="sd">        as_index: bool, default True</span> | 
|  | <span class="sd">            For aggregated output, return object with group labels as the</span> | 
|  | <span class="sd">            index. Only relevant for DataFrame input. as_index=False is</span> | 
|  | <span class="sd">            effectively "SQL-style" grouped output.</span> | 
|  | <span class="sd">        dropna: bool, default True</span> | 
|  | <span class="sd">            If True, and if group keys contain NA values,</span> | 
|  | <span class="sd">            NA values together with row/column will be dropped.</span> | 
|  | <span class="sd">            If False, NA values will also be treated as the key in groups.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrameGroupBy or SeriesGroupBy</span> | 
|  | <span class="sd">            Depends on the calling object and returns groupby object that</span> | 
|  | <span class="sd">            contains information about the groups.</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        pyspark.pandas.groupby.GroupBy</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame({'Animal': ['Falcon', 'Falcon',</span> | 
|  | <span class="sd">        ...                               'Parrot', 'Parrot'],</span> | 
|  | <span class="sd">        ...                    'Max Speed': [380., 370., 24., 26.]},</span> | 
|  | <span class="sd">        ...                   columns=['Animal', 'Max Speed'])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">           Animal  Max Speed</span> | 
|  | <span class="sd">        0  Falcon      380.0</span> | 
|  | <span class="sd">        1  Falcon      370.0</span> | 
|  | <span class="sd">        2  Parrot       24.0</span> | 
|  | <span class="sd">        3  Parrot       26.0</span> | 
|  |  | 
|  | <span class="sd">        >>> df.groupby(['Animal']).mean().sort_index()  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">                Max Speed</span> | 
|  | <span class="sd">        Animal</span> | 
|  | <span class="sd">        Falcon      375.0</span> | 
|  | <span class="sd">        Parrot       25.0</span> | 
|  |  | 
|  | <span class="sd">        >>> df.groupby(['Animal'], as_index=False).mean().sort_values('Animal')</span> | 
|  | <span class="sd">        ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">           Animal  Max Speed</span> | 
|  | <span class="sd">        ...Falcon      375.0</span> | 
|  | <span class="sd">        ...Parrot       25.0</span> | 
|  |  | 
|  | <span class="sd">        We can also choose to include NA in group keys or not by setting dropna parameter,</span> | 
|  | <span class="sd">        the default setting is True:</span> | 
|  |  | 
|  | <span class="sd">        >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame(l, columns=["a", "b", "c"])</span> | 
|  | <span class="sd">        >>> df.groupby(by=["b"]).sum().sort_index()  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">             a  c</span> | 
|  | <span class="sd">        b</span> | 
|  | <span class="sd">        1.0  2  3</span> | 
|  | <span class="sd">        2.0  2  5</span> | 
|  |  | 
|  | <span class="sd">        >>> df.groupby(by=["b"], dropna=False).sum().sort_index()  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">             a  c</span> | 
|  | <span class="sd">        b</span> | 
|  | <span class="sd">        1.0  2  3</span> | 
|  | <span class="sd">        2.0  2  5</span> | 
|  | <span class="sd">        NaN  1  4</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">new_by</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Label</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">]]</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">by</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Grouper for '</span><span class="si">{}</span><span class="s2">' not 1-dimensional"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">by</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">))</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">by</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="n">new_by</span> <span class="o">=</span> <span class="p">[</span><span class="n">by</span><span class="p">]</span> | 
|  | <span class="k">elif</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">by</span><span class="p">):</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">by</span><span class="p">)</span> | 
|  | <span class="n">new_by</span> <span class="o">=</span> <span class="p">[</span><span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="n">by</span><span class="p">)]</span> | 
|  | <span class="k">elif</span> <span class="n">is_name_like_value</span><span class="p">(</span><span class="n">by</span><span class="p">):</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">by</span><span class="p">)</span> | 
|  | <span class="n">new_by</span> <span class="o">=</span> <span class="p">[</span><span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="p">(</span><span class="n">by</span><span class="p">,))]</span> | 
|  | <span class="k">elif</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">by</span><span class="p">):</span> | 
|  | <span class="n">new_by</span> <span class="o">=</span> <span class="p">[]</span> | 
|  | <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">by</span><span class="p">:</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> | 
|  | <span class="s2">"Grouper for '</span><span class="si">{}</span><span class="s2">' not 1-dimensional"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">key</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="n">new_by</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> | 
|  | <span class="k">elif</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">key</span><span class="p">):</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> | 
|  | <span class="n">new_by</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="n">key</span><span class="p">))</span> | 
|  | <span class="k">elif</span> <span class="n">is_name_like_value</span><span class="p">(</span><span class="n">key</span><span class="p">):</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> | 
|  | <span class="n">new_by</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="p">(</span><span class="n">key</span><span class="p">,)))</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> | 
|  | <span class="s2">"Grouper for '</span><span class="si">{}</span><span class="s2">' not 1-dimensional"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">key</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Grouper for '</span><span class="si">{}</span><span class="s2">' not 1-dimensional"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">by</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">))</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">new_by</span><span class="p">):</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"No group keys passed!"</span><span class="p">)</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'axis should be either 0 or "index" currently.'</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_build_groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="n">new_by</span><span class="p">,</span> <span class="n">as_index</span><span class="o">=</span><span class="n">as_index</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="n">dropna</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_build_groupby</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">by</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="s2">"Series"</span><span class="p">,</span> <span class="n">Label</span><span class="p">]],</span> <span class="n">as_index</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"GroupBy[FrameLike]"</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">bool</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the bool of a single element in the current object.</span> | 
|  |  | 
|  | <span class="sd">        This must be a boolean scalar value, either True or False. Raise a ValueError if</span> | 
|  | <span class="sd">        the object does not have exactly 1 element, or that element is not boolean</span> | 
|  |  | 
|  | <span class="sd">        .. deprecated:: 4.0.0</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        bool</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> ps.DataFrame({'a': [True]}).bool()</span> | 
|  | <span class="sd">        True</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([False]).bool()</span> | 
|  | <span class="sd">        False</span> | 
|  |  | 
|  | <span class="sd">        If there are non-boolean or multiple values exist, it raises an exception in all</span> | 
|  | <span class="sd">        cases as below.</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.DataFrame({'a': ['a']}).bool()</span> | 
|  | <span class="sd">        Traceback (most recent call last):</span> | 
|  | <span class="sd">          ...</span> | 
|  | <span class="sd">        ValueError: bool cannot act on a non-boolean single element DataFrame</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.DataFrame({'a': [True], 'b': [False]}).bool()  # doctest: +NORMALIZE_WHITESPACE</span> | 
|  | <span class="sd">        Traceback (most recent call last):</span> | 
|  | <span class="sd">          ...</span> | 
|  | <span class="sd">        ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(),</span> | 
|  | <span class="sd">        a.item(), a.any() or a.all().</span> | 
|  |  | 
|  | <span class="sd">        >>> ps.Series([1]).bool()</span> | 
|  | <span class="sd">        Traceback (most recent call last):</span> | 
|  | <span class="sd">          ...</span> | 
|  | <span class="sd">        ValueError: bool cannot act on a non-boolean single element DataFrame</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2">.bool is now deprecated "</span> | 
|  | <span class="s2">"and will be removed in future version."</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="n">df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_dataframe</span><span class="p">()</span> | 
|  | <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">first_valid_index</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="o">...</span><span class="p">]]]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Retrieves the index of the first valid value.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        scalar, tuple, or None</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        Support for DataFrame</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf = ps.DataFrame({'a': [None, 2, 3, 2],</span> | 
|  | <span class="sd">        ...                     'b': [None, 2.0, 3.0, 1.0],</span> | 
|  | <span class="sd">        ...                     'c': [None, 200, 400, 200]},</span> | 
|  | <span class="sd">        ...                     index=['Q', 'W', 'E', 'R'])</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">             a    b      c</span> | 
|  | <span class="sd">        Q  NaN  NaN    NaN</span> | 
|  | <span class="sd">        W  2.0  2.0  200.0</span> | 
|  | <span class="sd">        E  3.0  3.0  400.0</span> | 
|  | <span class="sd">        R  2.0  1.0  200.0</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.first_valid_index()</span> | 
|  | <span class="sd">        'W'</span> | 
|  |  | 
|  | <span class="sd">        Support for MultiIndex columns</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.columns = pd.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')])</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">             a    b      c</span> | 
|  | <span class="sd">             x    y      z</span> | 
|  | <span class="sd">        Q  NaN  NaN    NaN</span> | 
|  | <span class="sd">        W  2.0  2.0  200.0</span> | 
|  | <span class="sd">        E  3.0  3.0  400.0</span> | 
|  | <span class="sd">        R  2.0  1.0  200.0</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.first_valid_index()</span> | 
|  | <span class="sd">        'W'</span> | 
|  |  | 
|  | <span class="sd">        Support for Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([None, None, 3, 4, 5], index=[100, 200, 300, 400, 500])</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        100    NaN</span> | 
|  | <span class="sd">        200    NaN</span> | 
|  | <span class="sd">        300    3.0</span> | 
|  | <span class="sd">        400    4.0</span> | 
|  | <span class="sd">        500    5.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> s.first_valid_index()</span> | 
|  | <span class="sd">        300</span> | 
|  |  | 
|  | <span class="sd">        Support for MultiIndex</span> | 
|  |  | 
|  | <span class="sd">        >>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],</span> | 
|  | <span class="sd">        ...                       ['speed', 'weight', 'length']],</span> | 
|  | <span class="sd">        ...                      [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span> | 
|  | <span class="sd">        ...                       [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span> | 
|  | <span class="sd">        >>> s = ps.Series([None, None, None, None, 250, 1.5, 320, 1, 0.3], index=midx)</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        lama    speed       NaN</span> | 
|  | <span class="sd">                weight      NaN</span> | 
|  | <span class="sd">                length      NaN</span> | 
|  | <span class="sd">        cow     speed       NaN</span> | 
|  | <span class="sd">                weight    250.0</span> | 
|  | <span class="sd">                length      1.5</span> | 
|  | <span class="sd">        falcon  speed     320.0</span> | 
|  | <span class="sd">                weight      1.0</span> | 
|  | <span class="sd">                length      0.3</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> s.first_valid_index()</span> | 
|  | <span class="sd">        ('cow', 'weight')</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">data_spark_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">data_spark_columns</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="n">cond</span> <span class="o">=</span> <span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&</span> <span class="n">y</span><span class="p">,</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span> <span class="n">data_spark_columns</span><span class="p">))</span> | 
|  |  | 
|  | <span class="k">with</span> <span class="n">sql_conf</span><span class="p">({</span><span class="n">SPARK_CONF_ARROW_ENABLED</span><span class="p">:</span> <span class="kc">False</span><span class="p">}):</span> | 
|  | <span class="c1"># Disable Arrow to keep row ordering.</span> | 
|  | <span class="n">first_valid_row</span> <span class="o">=</span> <span class="p">(</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">toPandas</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># For Empty Series or DataFrame, returns None.</span> | 
|  | <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">first_valid_row</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="n">first_valid_row</span> <span class="o">=</span> <span class="n">first_valid_row</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> | 
|  | <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">first_valid_row</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">first_valid_row</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">first_valid_row</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">last_valid_index</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="o">...</span><span class="p">]]]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return index for last non-NA/null value.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        scalar, tuple, or None</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        This API only works with PySpark >= 3.0.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  |  | 
|  | <span class="sd">        Support for DataFrame</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf = ps.DataFrame({'a': [1, 2, 3, None],</span> | 
|  | <span class="sd">        ...                     'b': [1.0, 2.0, 3.0, None],</span> | 
|  | <span class="sd">        ...                     'c': [100, 200, 400, None]},</span> | 
|  | <span class="sd">        ...                     index=['Q', 'W', 'E', 'R'])</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">             a    b      c</span> | 
|  | <span class="sd">        Q  1.0  1.0  100.0</span> | 
|  | <span class="sd">        W  2.0  2.0  200.0</span> | 
|  | <span class="sd">        E  3.0  3.0  400.0</span> | 
|  | <span class="sd">        R  NaN  NaN    NaN</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.last_valid_index()  # doctest: +SKIP</span> | 
|  | <span class="sd">        'E'</span> | 
|  |  | 
|  | <span class="sd">        Support for MultiIndex columns</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.columns = pd.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')])</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">             a    b      c</span> | 
|  | <span class="sd">             x    y      z</span> | 
|  | <span class="sd">        Q  1.0  1.0  100.0</span> | 
|  | <span class="sd">        W  2.0  2.0  200.0</span> | 
|  | <span class="sd">        E  3.0  3.0  400.0</span> | 
|  | <span class="sd">        R  NaN  NaN    NaN</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.last_valid_index()  # doctest: +SKIP</span> | 
|  | <span class="sd">        'E'</span> | 
|  |  | 
|  | <span class="sd">        Support for Series.</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([1, 2, 3, None, None], index=[100, 200, 300, 400, 500])</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        100    1.0</span> | 
|  | <span class="sd">        200    2.0</span> | 
|  | <span class="sd">        300    3.0</span> | 
|  | <span class="sd">        400    NaN</span> | 
|  | <span class="sd">        500    NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> s.last_valid_index()  # doctest: +SKIP</span> | 
|  | <span class="sd">        300</span> | 
|  |  | 
|  | <span class="sd">        Support for MultiIndex</span> | 
|  |  | 
|  | <span class="sd">        >>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],</span> | 
|  | <span class="sd">        ...                       ['speed', 'weight', 'length']],</span> | 
|  | <span class="sd">        ...                      [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span> | 
|  | <span class="sd">        ...                       [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span> | 
|  | <span class="sd">        >>> s = ps.Series([250, 1.5, 320, 1, 0.3, None, None, None, None], index=midx)</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        lama    speed     250.0</span> | 
|  | <span class="sd">                weight      1.5</span> | 
|  | <span class="sd">                length    320.0</span> | 
|  | <span class="sd">        cow     speed       1.0</span> | 
|  | <span class="sd">                weight      0.3</span> | 
|  | <span class="sd">                length      NaN</span> | 
|  | <span class="sd">        falcon  speed       NaN</span> | 
|  | <span class="sd">                weight      NaN</span> | 
|  | <span class="sd">                length      NaN</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> s.last_valid_index()  # doctest: +SKIP</span> | 
|  | <span class="sd">        ('cow', 'weight')</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">data_spark_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">data_spark_columns</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="n">cond</span> <span class="o">=</span> <span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&</span> <span class="n">y</span><span class="p">,</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span> <span class="n">data_spark_columns</span><span class="p">))</span> | 
|  |  | 
|  | <span class="n">last_valid_rows</span> <span class="o">=</span> <span class="p">(</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">tail</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># For Empty Series or DataFrame, returns None.</span> | 
|  | <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">last_valid_rows</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="n">last_valid_row</span> <span class="o">=</span> <span class="n">last_valid_rows</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">last_valid_row</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">last_valid_row</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">last_valid_row</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: 'center', 'win_type', 'on', 'axis' parameter should be implemented.</span> | 
|  | <span class="k">def</span> <span class="nf">rolling</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Rolling[FrameLike]"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Provide rolling transformations.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.</span> | 
|  | <span class="sd">            Unlike pandas, NA is also counted as the period. This might be changed</span> | 
|  | <span class="sd">            soon.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        window: int, or offset</span> | 
|  | <span class="sd">            Size of the moving window.</span> | 
|  | <span class="sd">            This is the number of observations used for calculating the statistic.</span> | 
|  | <span class="sd">            Each window will be a fixed size.</span> | 
|  |  | 
|  | <span class="sd">        min_periods: int, default None</span> | 
|  | <span class="sd">            Minimum number of observations in window required to have a value</span> | 
|  | <span class="sd">            (otherwise result is NA).</span> | 
|  | <span class="sd">            For a window that is specified by an offset, min_periods will default to 1.</span> | 
|  | <span class="sd">            Otherwise, min_periods will default to the size of the window.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        a Window sub-classed for the operation</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.window</span> <span class="kn">import</span> <span class="n">Rolling</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">Rolling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">window</span><span class="o">=</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="o">=</span><span class="n">min_periods</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: 'center' and 'axis' parameter should be implemented.</span> | 
|  | <span class="c1">#   'axis' implementation, refer https://github.com/databricks/koalas/pull/607</span> | 
|  | <span class="k">def</span> <span class="nf">expanding</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Expanding[FrameLike]"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Provide expanding transformations.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.</span> | 
|  | <span class="sd">            Unlike pandas, NA is also counted as the period. This might be changed</span> | 
|  | <span class="sd">            soon.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        min_periods: int, default 1</span> | 
|  | <span class="sd">            Minimum number of observations in window required to have a value</span> | 
|  | <span class="sd">            (otherwise result is NA).</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        a Window sub-classed for the operation</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.window</span> <span class="kn">import</span> <span class="n">Expanding</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">Expanding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">min_periods</span><span class="o">=</span><span class="n">min_periods</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># TODO: 'adjust', 'axis', 'method' parameter should be implemented.</span> | 
|  | <span class="k">def</span> <span class="nf">ewm</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">ignore_na</span><span class="p">:</span> <span class="n">bool_type</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"ExponentialMoving[FrameLike]"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Provide exponentially weighted window transformations.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.</span> | 
|  | <span class="sd">            Unlike pandas, NA is also counted as the period. This might be changed</span> | 
|  | <span class="sd">            soon.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 3.4.0</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        com: float, optional</span> | 
|  | <span class="sd">            Specify decay in terms of center of mass.</span> | 
|  | <span class="sd">            alpha = 1 / (1 + com), for com >= 0.</span> | 
|  |  | 
|  | <span class="sd">        span: float, optional</span> | 
|  | <span class="sd">            Specify decay in terms of span.</span> | 
|  | <span class="sd">            alpha = 2 / (span + 1), for span >= 1.</span> | 
|  |  | 
|  | <span class="sd">        halflife: float, optional</span> | 
|  | <span class="sd">            Specify decay in terms of half-life.</span> | 
|  | <span class="sd">            alpha = 1 - exp(-ln(2) / halflife), for halflife > 0.</span> | 
|  |  | 
|  | <span class="sd">        alpha: float, optional</span> | 
|  | <span class="sd">            Specify smoothing factor alpha directly.</span> | 
|  | <span class="sd">            0 < alpha <= 1.</span> | 
|  |  | 
|  | <span class="sd">        min_periods: int, default None</span> | 
|  | <span class="sd">            Minimum number of observations in window required to have a value</span> | 
|  | <span class="sd">            (otherwise result is NA).</span> | 
|  |  | 
|  | <span class="sd">        ignore_na: bool, default False</span> | 
|  | <span class="sd">            Ignore missing values when calculating weights.</span> | 
|  |  | 
|  | <span class="sd">            - When ``ignore_na=False`` (default), weights are based on absolute positions.</span> | 
|  | <span class="sd">              For example, the weights of :math:`x_0` and :math:`x_2` used in calculating</span> | 
|  | <span class="sd">              the final weighted average of [:math:`x_0`, None, :math:`x_2`] are</span> | 
|  | <span class="sd">              :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and</span> | 
|  | <span class="sd">              :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``.</span> | 
|  |  | 
|  | <span class="sd">            - When ``ignore_na=True``, weights are based</span> | 
|  | <span class="sd">              on relative positions. For example, the weights of :math:`x_0` and :math:`x_2`</span> | 
|  | <span class="sd">              used in calculating the final weighted average of</span> | 
|  | <span class="sd">              [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if</span> | 
|  | <span class="sd">              ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        a Window sub-classed for the operation</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.window</span> <span class="kn">import</span> <span class="n">ExponentialMoving</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">ExponentialMoving</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">com</span><span class="o">=</span><span class="n">com</span><span class="p">,</span> | 
|  | <span class="n">span</span><span class="o">=</span><span class="n">span</span><span class="p">,</span> | 
|  | <span class="n">halflife</span><span class="o">=</span><span class="n">halflife</span><span class="p">,</span> | 
|  | <span class="n">alpha</span><span class="o">=</span><span class="n">alpha</span><span class="p">,</span> | 
|  | <span class="n">min_periods</span><span class="o">=</span><span class="n">min_periods</span><span class="p">,</span> | 
|  | <span class="n">ignore_na</span><span class="o">=</span><span class="n">ignore_na</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">default</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Any</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Get item from object for given key (DataFrame column, Panel slice,</span> | 
|  | <span class="sd">        etc.). Returns default value if not found.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        key: object</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        value: same type as items contained in object</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame({'x':range(3), 'y':['a','b','b'], 'z':['a','b','b']},</span> | 
|  | <span class="sd">        ...                   columns=['x', 'y', 'z'], index=[10, 20, 20])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">            x  y  z</span> | 
|  | <span class="sd">        10  0  a  a</span> | 
|  | <span class="sd">        20  1  b  b</span> | 
|  | <span class="sd">        20  2  b  b</span> | 
|  |  | 
|  | <span class="sd">        >>> df.get('x')</span> | 
|  | <span class="sd">        10    0</span> | 
|  | <span class="sd">        20    1</span> | 
|  | <span class="sd">        20    2</span> | 
|  | <span class="sd">        Name: x, dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.get(['x', 'y'])</span> | 
|  | <span class="sd">            x  y</span> | 
|  | <span class="sd">        10  0  a</span> | 
|  | <span class="sd">        20  1  b</span> | 
|  | <span class="sd">        20  2  b</span> | 
|  |  | 
|  | <span class="sd">        >>> df.x.get(10)</span> | 
|  | <span class="sd">        0</span> | 
|  |  | 
|  | <span class="sd">        >>> df.x.get(20)</span> | 
|  | <span class="sd">        20    1</span> | 
|  | <span class="sd">        20    2</span> | 
|  | <span class="sd">        Name: x, dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> df.x.get(15, -1)</span> | 
|  | <span class="sd">        -1</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">try</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> | 
|  | <span class="k">except</span> <span class="p">(</span><span class="ne">KeyError</span><span class="p">,</span> <span class="ne">ValueError</span><span class="p">,</span> <span class="ne">IndexError</span><span class="p">):</span> | 
|  | <span class="k">return</span> <span class="n">default</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">squeeze</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">"DataFrame"</span><span class="p">,</span> <span class="s2">"Series"</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Squeeze 1 dimensional axis objects into scalars.</span> | 
|  |  | 
|  | <span class="sd">        Series or DataFrames with a single element are squeezed to a scalar.</span> | 
|  | <span class="sd">        DataFrames with a single column or a single row are squeezed to a</span> | 
|  | <span class="sd">        Series. Otherwise the object is unchanged.</span> | 
|  |  | 
|  | <span class="sd">        This method is most useful when you don't know if your</span> | 
|  | <span class="sd">        object is a Series or DataFrame, but you do know it has just a single</span> | 
|  | <span class="sd">        column. In that case you can safely call `squeeze` to ensure you have a</span> | 
|  | <span class="sd">        Series.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {0 or 'index', 1 or 'columns', None}, default None</span> | 
|  | <span class="sd">            A specific axis to squeeze. By default, all length-1 axes are</span> | 
|  | <span class="sd">            squeezed.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame, Series, or scalar</span> | 
|  | <span class="sd">            The projection after squeezing `axis` or all the axes.</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        Series.iloc: Integer-location based indexing for selecting scalars.</span> | 
|  | <span class="sd">        DataFrame.iloc: Integer-location based indexing for selecting Series.</span> | 
|  | <span class="sd">        Series.to_frame: Inverse of DataFrame.squeeze for a</span> | 
|  | <span class="sd">            single-column DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> primes = ps.Series([2, 3, 5, 7])</span> | 
|  |  | 
|  | <span class="sd">        Slicing might produce a Series with a single value:</span> | 
|  |  | 
|  | <span class="sd">        >>> even_primes = primes[primes % 2 == 0]</span> | 
|  | <span class="sd">        >>> even_primes</span> | 
|  | <span class="sd">        0    2</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> even_primes.squeeze()</span> | 
|  | <span class="sd">        2</span> | 
|  |  | 
|  | <span class="sd">        Squeezing objects with more than one value in every axis does nothing:</span> | 
|  |  | 
|  | <span class="sd">        >>> odd_primes = primes[primes % 2 == 1]</span> | 
|  | <span class="sd">        >>> odd_primes</span> | 
|  | <span class="sd">        1    3</span> | 
|  | <span class="sd">        2    5</span> | 
|  | <span class="sd">        3    7</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> odd_primes.squeeze()</span> | 
|  | <span class="sd">        1    3</span> | 
|  | <span class="sd">        2    5</span> | 
|  | <span class="sd">        3    7</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        Squeezing is even more effective when used with DataFrames.</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">           a  b</span> | 
|  | <span class="sd">        0  1  2</span> | 
|  | <span class="sd">        1  3  4</span> | 
|  |  | 
|  | <span class="sd">        Slicing a single column will produce a DataFrame with the columns</span> | 
|  | <span class="sd">        having only one value:</span> | 
|  |  | 
|  | <span class="sd">        >>> df_a = df[['a']]</span> | 
|  | <span class="sd">        >>> df_a</span> | 
|  | <span class="sd">           a</span> | 
|  | <span class="sd">        0  1</span> | 
|  | <span class="sd">        1  3</span> | 
|  |  | 
|  | <span class="sd">        The columns can be squeezed down, resulting in a Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df_a.squeeze('columns')</span> | 
|  | <span class="sd">        0    1</span> | 
|  | <span class="sd">        1    3</span> | 
|  | <span class="sd">        Name: a, dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        Slicing a single row from a single column will produce a single</span> | 
|  | <span class="sd">        scalar DataFrame:</span> | 
|  |  | 
|  | <span class="sd">        >>> df_1a = df.loc[[1], ['a']]</span> | 
|  | <span class="sd">        >>> df_1a</span> | 
|  | <span class="sd">           a</span> | 
|  | <span class="sd">        1  3</span> | 
|  |  | 
|  | <span class="sd">        Squeezing the rows produces a single scalar Series:</span> | 
|  |  | 
|  | <span class="sd">        >>> df_1a.squeeze('rows')</span> | 
|  | <span class="sd">        a    3</span> | 
|  | <span class="sd">        Name: 1, dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        Squeezing all axes will project directly into a scalar:</span> | 
|  |  | 
|  | <span class="sd">        >>> df_1a.squeeze()</span> | 
|  | <span class="sd">        3</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="s2">"index"</span> <span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="s2">"rows"</span> <span class="k">else</span> <span class="n">axis</span> | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">first_series</span> | 
|  |  | 
|  | <span class="n">is_squeezable</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">columns</span><span class="p">[:</span><span class="mi">2</span><span class="p">])</span> <span class="o">==</span> <span class="mi">1</span> | 
|  | <span class="c1"># If DataFrame has multiple columns, there is no change.</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="n">is_squeezable</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="bp">self</span> | 
|  | <span class="n">series_from_column</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> | 
|  | <span class="n">has_single_value</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">series_from_column</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">2</span><span class="p">))</span> <span class="o">==</span> <span class="mi">1</span> | 
|  | <span class="c1"># If DataFrame has only a single value, use pandas API directly.</span> | 
|  | <span class="k">if</span> <span class="n">has_single_value</span><span class="p">:</span> | 
|  | <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">)</span> <span class="k">else</span> <span class="n">result</span> | 
|  | <span class="k">elif</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="bp">self</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">series_from_column</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="c1"># The case of Series is simple.</span> | 
|  | <span class="c1"># If Series has only a single value, just return it as a scalar.</span> | 
|  | <span class="c1"># Otherwise, there is no change.</span> | 
|  | <span class="n">self_top_two</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="s2">"Series"</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> | 
|  | <span class="n">has_single_value</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">self_top_two</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> | 
|  | <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">],</span> <span class="n">self_top_two</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="n">has_single_value</span> <span class="k">else</span> <span class="bp">self</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">truncate</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">before</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">after</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">copy</span><span class="p">:</span> <span class="n">bool_type</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">DataFrameOrSeries</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Truncate a Series or DataFrame before and after some index value.</span> | 
|  |  | 
|  | <span class="sd">        This is a useful shorthand for boolean indexing based on index</span> | 
|  | <span class="sd">        values above or below certain thresholds.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: This API is dependent on :meth:`Index.is_monotonic_increasing`</span> | 
|  | <span class="sd">            which can be expensive.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        before: date, str, int</span> | 
|  | <span class="sd">            Truncate all rows before this index value.</span> | 
|  | <span class="sd">        after: date, str, int</span> | 
|  | <span class="sd">            Truncate all rows after this index value.</span> | 
|  | <span class="sd">        axis: {0 or 'index', 1 or 'columns'}, optional</span> | 
|  | <span class="sd">            Axis to truncate. Truncates the index (rows) by default.</span> | 
|  | <span class="sd">        copy: bool, default is True,</span> | 
|  | <span class="sd">            Return a copy of the truncated section.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        type of caller</span> | 
|  | <span class="sd">            The truncated Series or DataFrame.</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        DataFrame.loc: Select a subset of a DataFrame by label.</span> | 
|  | <span class="sd">        DataFrame.iloc: Select a subset of a DataFrame by position.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> df = ps.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],</span> | 
|  | <span class="sd">        ...                    'B': ['f', 'g', 'h', 'i', 'j'],</span> | 
|  | <span class="sd">        ...                    'C': ['k', 'l', 'm', 'n', 'o']},</span> | 
|  | <span class="sd">        ...                   index=[1, 2, 3, 4, 5])</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">           A  B  C</span> | 
|  | <span class="sd">        1  a  f  k</span> | 
|  | <span class="sd">        2  b  g  l</span> | 
|  | <span class="sd">        3  c  h  m</span> | 
|  | <span class="sd">        4  d  i  n</span> | 
|  | <span class="sd">        5  e  j  o</span> | 
|  |  | 
|  | <span class="sd">        >>> df.truncate(before=2, after=4)</span> | 
|  | <span class="sd">           A  B  C</span> | 
|  | <span class="sd">        2  b  g  l</span> | 
|  | <span class="sd">        3  c  h  m</span> | 
|  | <span class="sd">        4  d  i  n</span> | 
|  |  | 
|  | <span class="sd">        The columns of a DataFrame can be truncated.</span> | 
|  |  | 
|  | <span class="sd">        >>> df.truncate(before="A", after="B", axis="columns")</span> | 
|  | <span class="sd">           A  B</span> | 
|  | <span class="sd">        1  a  f</span> | 
|  | <span class="sd">        2  b  g</span> | 
|  | <span class="sd">        3  c  h</span> | 
|  | <span class="sd">        4  d  i</span> | 
|  | <span class="sd">        5  e  j</span> | 
|  |  | 
|  | <span class="sd">        For Series, only rows can be truncated.</span> | 
|  |  | 
|  | <span class="sd">        >>> df['A'].truncate(before=2, after=4)</span> | 
|  | <span class="sd">        2    b</span> | 
|  | <span class="sd">        3    c</span> | 
|  | <span class="sd">        4    d</span> | 
|  | <span class="sd">        Name: A, dtype: object</span> | 
|  |  | 
|  | <span class="sd">        A Series has index that sorted integers.</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([10, 20, 30, 40, 50, 60, 70],</span> | 
|  | <span class="sd">        ...               index=[1, 2, 3, 4, 5, 6, 7])</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        1    10</span> | 
|  | <span class="sd">        2    20</span> | 
|  | <span class="sd">        3    30</span> | 
|  | <span class="sd">        4    40</span> | 
|  | <span class="sd">        5    50</span> | 
|  | <span class="sd">        6    60</span> | 
|  | <span class="sd">        7    70</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> s.truncate(2, 5)</span> | 
|  | <span class="sd">        2    20</span> | 
|  | <span class="sd">        3    30</span> | 
|  | <span class="sd">        4    40</span> | 
|  | <span class="sd">        5    50</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        A Series has index that sorted strings.</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([10, 20, 30, 40, 50, 60, 70],</span> | 
|  | <span class="sd">        ...               index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        a    10</span> | 
|  | <span class="sd">        b    20</span> | 
|  | <span class="sd">        c    30</span> | 
|  | <span class="sd">        d    40</span> | 
|  | <span class="sd">        e    50</span> | 
|  | <span class="sd">        f    60</span> | 
|  | <span class="sd">        g    70</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  |  | 
|  | <span class="sd">        >>> s.truncate('b', 'e')</span> | 
|  | <span class="sd">        b    20</span> | 
|  | <span class="sd">        c    30</span> | 
|  | <span class="sd">        d    40</span> | 
|  | <span class="sd">        e    50</span> | 
|  | <span class="sd">        dtype: int64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">first_series</span> | 
|  |  | 
|  | <span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span> | 
|  | <span class="n">indexes</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span> | 
|  | <span class="n">indexes_increasing</span> <span class="o">=</span> <span class="n">indexes</span><span class="o">.</span><span class="n">is_monotonic_increasing</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="n">indexes_increasing</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">indexes</span><span class="o">.</span><span class="n">is_monotonic_decreasing</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"truncate requires a sorted index"</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="p">(</span><span class="n">before</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">after</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">):</span> | 
|  | <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">Union</span><span class="p">[</span><span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="k">if</span> <span class="n">copy</span> <span class="k">else</span> <span class="bp">self</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="p">(</span><span class="n">before</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">after</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="n">before</span> <span class="o">></span> <span class="n">after</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Truncate: </span><span class="si">%s</span><span class="s2"> must be after </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">after</span><span class="p">,</span> <span class="n">before</span><span class="p">))</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span> | 
|  | <span class="k">if</span> <span class="n">indexes_increasing</span><span class="p">:</span> | 
|  | <span class="n">result</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">before</span><span class="p">:</span><span class="n">after</span><span class="p">]</span>  <span class="c1"># type: ignore[arg-type]</span> | 
|  | <span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">result</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">after</span><span class="p">:</span><span class="n">before</span><span class="p">]</span>  <span class="c1"># type: ignore[arg-type]</span> | 
|  | <span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> | 
|  | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span> | 
|  | <span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | 
|  | <span class="k">if</span> <span class="n">indexes_increasing</span><span class="p">:</span> | 
|  | <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">before</span><span class="p">:</span><span class="n">after</span><span class="p">]</span>  <span class="c1"># type: ignore[assignment]</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">after</span><span class="p">:</span><span class="n">before</span><span class="p">]</span>  <span class="c1"># type: ignore[assignment]</span> | 
|  | <span class="k">elif</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> | 
|  | <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="n">before</span><span class="p">:</span><span class="n">after</span><span class="p">]</span>  <span class="c1"># type: ignore[assignment]</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">DataFrameOrSeries</span><span class="p">,</span> <span class="n">result</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="k">if</span> <span class="n">copy</span> <span class="k">else</span> <span class="n">result</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">to_markdown</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="n">buf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">IO</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Print Series or DataFrame in Markdown-friendly format.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: This method should only be used if the resulting pandas object is expected</span> | 
|  | <span class="sd">                  to be small, as all the data is loaded into the driver's memory.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        buf: writable buffer, defaults to sys.stdout</span> | 
|  | <span class="sd">            Where to send the output. By default, the output is printed to</span> | 
|  | <span class="sd">            sys.stdout. Pass a writable buffer if you need to further process</span> | 
|  | <span class="sd">            the output.</span> | 
|  | <span class="sd">        mode: str, optional</span> | 
|  | <span class="sd">            Mode in which file is opened.</span> | 
|  | <span class="sd">        **kwargs</span> | 
|  | <span class="sd">            These parameters will be passed to `tabulate`.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        str</span> | 
|  | <span class="sd">            Series or DataFrame in Markdown-friendly format.</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        Requires the `tabulate <https://pypi.org/project/tabulate>`_ package.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> psser = ps.Series(["elk", "pig", "dog", "quetzal"], name="animal")</span> | 
|  | <span class="sd">        >>> print(psser.to_markdown())  # doctest: +SKIP</span> | 
|  | <span class="sd">        |    | animal   |</span> | 
|  | <span class="sd">        |---:|:---------|</span> | 
|  | <span class="sd">        |  0 | elk      |</span> | 
|  | <span class="sd">        |  1 | pig      |</span> | 
|  | <span class="sd">        |  2 | dog      |</span> | 
|  | <span class="sd">        |  3 | quetzal  |</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf = ps.DataFrame(</span> | 
|  | <span class="sd">        ...     data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]}</span> | 
|  | <span class="sd">        ... )</span> | 
|  | <span class="sd">        >>> print(psdf.to_markdown())  # doctest: +SKIP</span> | 
|  | <span class="sd">        |    | animal_1   | animal_2   |</span> | 
|  | <span class="sd">        |---:|:-----------|:-----------|</span> | 
|  | <span class="sd">        |  0 | elk        | dog        |</span> | 
|  | <span class="sd">        |  1 | pig        | quetzal    |</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">log_advice</span><span class="p">(</span> | 
|  | <span class="s2">"`to_markdown` loads all data into the driver's memory. "</span> | 
|  | <span class="s2">"It should only be used if the resulting pandas object is expected to be small."</span> | 
|  | <span class="p">)</span> | 
|  | <span class="c1"># Make sure locals() call is at the top of the function so we don't capture local variables.</span> | 
|  | <span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span> | 
|  | <span class="n">psser_or_psdf</span> <span class="o">=</span> <span class="bp">self</span> | 
|  | <span class="n">internal_pandas</span> <span class="o">=</span> <span class="n">psser_or_psdf</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span> | 
|  | <span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span> | 
|  | <span class="n">internal_pandas</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_markdown</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="n">internal_pandas</span><span class="p">)</span><span class="o">.</span><span class="n">to_markdown</span><span class="p">,</span> <span class="n">args</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@abstractmethod</span> | 
|  | <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">method</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inplace</span><span class="p">:</span> <span class="n">bool_type</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'downcast' when value parameter exists</span> | 
|  | <span class="k">def</span> <span class="nf">bfill</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inplace</span><span class="p">:</span> <span class="n">bool_type</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Synonym for `DataFrame.fillna()` or `Series.fillna()` with ``method=`bfill```.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of 'bfill' uses Spark's Window</span> | 
|  | <span class="sd">            without specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {0 or `index`}</span> | 
|  | <span class="sd">            1 and `columns` are not supported.</span> | 
|  | <span class="sd">        inplace: boolean, default False</span> | 
|  | <span class="sd">            Fill in place (do not create a new object)</span> | 
|  | <span class="sd">        limit: int, default None</span> | 
|  | <span class="sd">            If method is specified, this is the maximum number of consecutive NaN values to</span> | 
|  | <span class="sd">            forward/backward fill. In other words, if there is a gap with more than this number of</span> | 
|  | <span class="sd">            consecutive NaNs, it will only be partially filled. If method is not specified,</span> | 
|  | <span class="sd">            this is the maximum number of entries along the entire axis where NaNs will be filled.</span> | 
|  | <span class="sd">            Must be greater than 0 if not None</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame or Series</span> | 
|  | <span class="sd">            DataFrame or Series with NA entries filled.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> psdf = ps.DataFrame({</span> | 
|  | <span class="sd">        ...     'A': [None, 3, None, None],</span> | 
|  | <span class="sd">        ...     'B': [2, 4, None, 3],</span> | 
|  | <span class="sd">        ...     'C': [None, None, None, 1],</span> | 
|  | <span class="sd">        ...     'D': [0, 1, 5, 4]</span> | 
|  | <span class="sd">        ...     },</span> | 
|  | <span class="sd">        ...     columns=['A', 'B', 'C', 'D'])</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">             A    B    C  D</span> | 
|  | <span class="sd">        0  NaN  2.0  NaN  0</span> | 
|  | <span class="sd">        1  3.0  4.0  NaN  1</span> | 
|  | <span class="sd">        2  NaN  NaN  NaN  5</span> | 
|  | <span class="sd">        3  NaN  3.0  1.0  4</span> | 
|  |  | 
|  | <span class="sd">        Propagate non-null values backward.</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.bfill()</span> | 
|  | <span class="sd">             A    B    C  D</span> | 
|  | <span class="sd">        0  3.0  2.0  1.0  0</span> | 
|  | <span class="sd">        1  3.0  4.0  1.0  1</span> | 
|  | <span class="sd">        2  NaN  3.0  1.0  5</span> | 
|  | <span class="sd">        3  NaN  3.0  1.0  4</span> | 
|  |  | 
|  | <span class="sd">        For Series</span> | 
|  |  | 
|  | <span class="sd">        >>> psser = ps.Series([None, None, None, 1])</span> | 
|  | <span class="sd">        >>> psser</span> | 
|  | <span class="sd">        0    NaN</span> | 
|  | <span class="sd">        1    NaN</span> | 
|  | <span class="sd">        2    NaN</span> | 
|  | <span class="sd">        3    1.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> psser.bfill()</span> | 
|  | <span class="sd">        0    1.0</span> | 
|  | <span class="sd">        1    1.0</span> | 
|  | <span class="sd">        2    1.0</span> | 
|  | <span class="sd">        3    1.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s2">"bfill"</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="n">inplace</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="n">limit</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">backfill</span> <span class="o">=</span> <span class="n">bfill</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'downcast' when value parameter exists</span> | 
|  | <span class="k">def</span> <span class="nf">ffill</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inplace</span><span class="p">:</span> <span class="n">bool_type</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Synonym for `DataFrame.fillna()` or `Series.fillna()` with ``method=`ffill```.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of 'ffill' uses Spark's Window</span> | 
|  | <span class="sd">            without specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single a partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        axis: {0 or `index`}</span> | 
|  | <span class="sd">            1 and `columns` are not supported.</span> | 
|  | <span class="sd">        inplace: boolean, default False</span> | 
|  | <span class="sd">            Fill in place (do not create a new object)</span> | 
|  | <span class="sd">        limit: int, default None</span> | 
|  | <span class="sd">            If method is specified, this is the maximum number of consecutive NaN values to</span> | 
|  | <span class="sd">            forward/backward fill. In other words, if there is a gap with more than this number of</span> | 
|  | <span class="sd">            consecutive NaNs, it will only be partially filled. If method is not specified,</span> | 
|  | <span class="sd">            this is the maximum number of entries along the entire axis where NaNs will be filled.</span> | 
|  | <span class="sd">            Must be greater than 0 if not None</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        DataFrame or Series</span> | 
|  | <span class="sd">            DataFrame or Series with NA entries filled.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> psdf = ps.DataFrame({</span> | 
|  | <span class="sd">        ...     'A': [None, 3, None, None],</span> | 
|  | <span class="sd">        ...     'B': [2, 4, None, 3],</span> | 
|  | <span class="sd">        ...     'C': [None, None, None, 1],</span> | 
|  | <span class="sd">        ...     'D': [0, 1, 5, 4]</span> | 
|  | <span class="sd">        ...     },</span> | 
|  | <span class="sd">        ...     columns=['A', 'B', 'C', 'D'])</span> | 
|  | <span class="sd">        >>> psdf</span> | 
|  | <span class="sd">             A    B    C  D</span> | 
|  | <span class="sd">        0  NaN  2.0  NaN  0</span> | 
|  | <span class="sd">        1  3.0  4.0  NaN  1</span> | 
|  | <span class="sd">        2  NaN  NaN  NaN  5</span> | 
|  | <span class="sd">        3  NaN  3.0  1.0  4</span> | 
|  |  | 
|  | <span class="sd">        Propagate non-null values forward.</span> | 
|  |  | 
|  | <span class="sd">        >>> psdf.ffill()</span> | 
|  | <span class="sd">             A    B    C  D</span> | 
|  | <span class="sd">        0  NaN  2.0  NaN  0</span> | 
|  | <span class="sd">        1  3.0  4.0  NaN  1</span> | 
|  | <span class="sd">        2  3.0  4.0  NaN  5</span> | 
|  | <span class="sd">        3  3.0  3.0  1.0  4</span> | 
|  |  | 
|  | <span class="sd">        For Series</span> | 
|  |  | 
|  | <span class="sd">        >>> psser = ps.Series([2, 4, None, 3])</span> | 
|  | <span class="sd">        >>> psser</span> | 
|  | <span class="sd">        0    2.0</span> | 
|  | <span class="sd">        1    4.0</span> | 
|  | <span class="sd">        2    NaN</span> | 
|  | <span class="sd">        3    3.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        >>> psser.ffill()</span> | 
|  | <span class="sd">        0    2.0</span> | 
|  | <span class="sd">        1    4.0</span> | 
|  | <span class="sd">        2    4.0</span> | 
|  | <span class="sd">        3    3.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s2">"ffill"</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="n">inplace</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="n">limit</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">pad</span> <span class="o">=</span> <span class="n">ffill</span> | 
|  |  | 
|  | <span class="c1"># TODO: add 'axis', 'inplace', 'downcast'</span> | 
|  | <span class="k">def</span> <span class="nf">interpolate</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> | 
|  | <span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"linear"</span><span class="p">,</span> | 
|  | <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">limit_direction</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">limit_area</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">FrameLike</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Fill NaN values using an interpolation method.</span> | 
|  |  | 
|  | <span class="sd">        .. note:: the current implementation of interpolate uses Spark's Window without</span> | 
|  | <span class="sd">            specifying partition specification. This leads to moveing all data into a</span> | 
|  | <span class="sd">            single partition in a single machine and could cause serious</span> | 
|  | <span class="sd">            performance degradation. Avoid this method with very large datasets.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 3.4.0</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        method: str, default 'linear'</span> | 
|  | <span class="sd">            Interpolation technique to use. One of:</span> | 
|  |  | 
|  | <span class="sd">            * 'linear': Ignore the index and treat the values as equally</span> | 
|  | <span class="sd">              spaced.</span> | 
|  |  | 
|  | <span class="sd">        limit: int, optional</span> | 
|  | <span class="sd">            Maximum number of consecutive NaNs to fill. Must be greater than</span> | 
|  | <span class="sd">            0.</span> | 
|  |  | 
|  | <span class="sd">        limit_direction: str, default None</span> | 
|  | <span class="sd">            Consecutive NaNs will be filled in this direction.</span> | 
|  | <span class="sd">            One of {{'forward', 'backward', 'both'}}.</span> | 
|  |  | 
|  | <span class="sd">        limit_area: str, default None</span> | 
|  | <span class="sd">            If limit is specified, consecutive NaNs will be filled with this restriction. One of:</span> | 
|  |  | 
|  | <span class="sd">            * None: No fill restriction.</span> | 
|  | <span class="sd">            * 'inside': Only fill NaNs surrounded by valid values (interpolate).</span> | 
|  | <span class="sd">            * 'outside': Only fill NaNs outside valid values (extrapolate).</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        Series or DataFrame or None</span> | 
|  | <span class="sd">            Returns the same object type as the caller, interpolated at</span> | 
|  | <span class="sd">            some or all NA values.</span> | 
|  |  | 
|  | <span class="sd">        See Also</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        fillna: Fill missing values using different methods.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        Filling in NA via linear interpolation.</span> | 
|  |  | 
|  | <span class="sd">        >>> s = ps.Series([0, 1, np.nan, 3])</span> | 
|  | <span class="sd">        >>> s</span> | 
|  | <span class="sd">        0    0.0</span> | 
|  | <span class="sd">        1    1.0</span> | 
|  | <span class="sd">        2    NaN</span> | 
|  | <span class="sd">        3    3.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  | <span class="sd">        >>> s.interpolate()</span> | 
|  | <span class="sd">        0    0.0</span> | 
|  | <span class="sd">        1    1.0</span> | 
|  | <span class="sd">        2    2.0</span> | 
|  | <span class="sd">        3    3.0</span> | 
|  | <span class="sd">        dtype: float64</span> | 
|  |  | 
|  | <span class="sd">        Fill the DataFrame forward (that is, going down) along each column</span> | 
|  | <span class="sd">        using linear interpolation.</span> | 
|  |  | 
|  | <span class="sd">        Note how the last entry in column 'a' is interpolated differently,</span> | 
|  | <span class="sd">        because there is no entry after it to use for interpolation.</span> | 
|  | <span class="sd">        Note how the first entry in column 'b' remains NA, because there</span> | 
|  | <span class="sd">        is no entry before it to use for interpolation.</span> | 
|  |  | 
|  | <span class="sd">        >>> df = ps.DataFrame([(0.0, np.nan, -1.0, 1.0),</span> | 
|  | <span class="sd">        ...                    (np.nan, 2.0, np.nan, np.nan),</span> | 
|  | <span class="sd">        ...                    (2.0, 3.0, np.nan, 9.0),</span> | 
|  | <span class="sd">        ...                    (np.nan, 4.0, -4.0, 16.0)],</span> | 
|  | <span class="sd">        ...                   columns=list('abcd'))</span> | 
|  | <span class="sd">        >>> df</span> | 
|  | <span class="sd">             a    b    c     d</span> | 
|  | <span class="sd">        0  0.0  NaN -1.0   1.0</span> | 
|  | <span class="sd">        1  NaN  2.0  NaN   NaN</span> | 
|  | <span class="sd">        2  2.0  3.0  NaN   9.0</span> | 
|  | <span class="sd">        3  NaN  4.0 -4.0  16.0</span> | 
|  | <span class="sd">        >>> df.interpolate(method='linear')</span> | 
|  | <span class="sd">             a    b    c     d</span> | 
|  | <span class="sd">        0  0.0  NaN -1.0   1.0</span> | 
|  | <span class="sd">        1  1.0  2.0 -2.0   5.0</span> | 
|  | <span class="sd">        2  2.0  3.0 -3.0   9.0</span> | 
|  | <span class="sd">        3  2.0  4.0 -4.0  16.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">interpolate</span><span class="p">(</span> | 
|  | <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="n">limit</span><span class="p">,</span> <span class="n">limit_direction</span><span class="o">=</span><span class="n">limit_direction</span><span class="p">,</span> <span class="n">limit_area</span><span class="o">=</span><span class="n">limit_area</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="k">def</span> <span class="nf">at</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">AtIndexer</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">AtIndexer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">at</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">AtIndexer</span><span class="o">.</span><span class="vm">__doc__</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="k">def</span> <span class="nf">iat</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">iAtIndexer</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">iAtIndexer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">iat</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">iAtIndexer</span><span class="o">.</span><span class="vm">__doc__</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="k">def</span> <span class="nf">iloc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">iLocIndexer</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">iLocIndexer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">iloc</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">iLocIndexer</span><span class="o">.</span><span class="vm">__doc__</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="k">def</span> <span class="nf">loc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">LocIndexer</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">LocIndexer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">loc</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">LocIndexer</span><span class="o">.</span><span class="vm">__doc__</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__bool__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">NoReturn</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> | 
|  | <span class="s2">"The truth value of a </span><span class="si">{0}</span><span class="s2"> is ambiguous. "</span> | 
|  | <span class="s2">"Use a.empty, a.bool(), a.item(), a.any() or a.all()."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@staticmethod</span> | 
|  | <span class="k">def</span> <span class="nf">_count_expr</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="s2">"Series"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Column</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">_dtype_op</span><span class="o">.</span><span class="n">nan_to_null</span><span class="p">(</span><span class="n">psser</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="kn">import</span> <span class="nn">os</span> | 
|  | <span class="kn">import</span> <span class="nn">doctest</span> | 
|  | <span class="kn">import</span> <span class="nn">shutil</span> | 
|  | <span class="kn">import</span> <span class="nn">sys</span> | 
|  | <span class="kn">import</span> <span class="nn">tempfile</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span> | 
|  | <span class="kn">import</span> <span class="nn">pyspark.pandas.generic</span> | 
|  |  | 
|  | <span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">"SPARK_HOME"</span><span class="p">])</span> | 
|  |  | 
|  | <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">generic</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"ps"</span><span class="p">]</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span> | 
|  | <span class="n">spark</span> <span class="o">=</span> <span class="p">(</span> | 
|  | <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[4]"</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"pyspark.pandas.generic tests"</span><span class="p">)</span> | 
|  | <span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">path</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"path"</span><span class="p">]</span> <span class="o">=</span> <span class="n">path</span> | 
|  |  | 
|  | <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span> | 
|  | <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">generic</span><span class="p">,</span> | 
|  | <span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> | 
|  | <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  | <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> | 
|  | <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> | 
|  | <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> | 
|  | <span class="n">_test</span><span class="p">()</span> | 
|  | </pre></div> | 
|  |  | 
|  | </article> | 
|  |  | 
|  |  | 
|  |  | 
|  | <footer class="bd-footer-article"> | 
|  |  | 
|  | <div class="footer-article-items footer-article__inner"> | 
|  |  | 
|  | <div class="footer-article-item"><!-- Previous / next buttons --> | 
|  | <div class="prev-next-area"> | 
|  | </div></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </footer> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | </div> | 
|  | <footer class="bd-footer-content"> | 
|  |  | 
|  | </footer> | 
|  |  | 
|  | </main> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <!-- Scripts loaded after <body> so the DOM is not blocked --> | 
|  | <script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script> | 
|  | <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script> | 
|  |  | 
|  | <footer class="bd-footer"> | 
|  | <div class="bd-footer__inner bd-page-width"> | 
|  |  | 
|  | <div class="footer-items__start"> | 
|  |  | 
|  | <div class="footer-item"><p class="copyright"> | 
|  | Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. | 
|  | </p></div> | 
|  |  | 
|  | <div class="footer-item"> | 
|  | <p class="sphinx-version"> | 
|  | Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. | 
|  | <br/> | 
|  | </p> | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="footer-items__end"> | 
|  |  | 
|  | <div class="footer-item"><p class="theme-version"> | 
|  | Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3. | 
|  | </p></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </footer> | 
|  | </body> | 
|  | </html> |