python/pyspark/pandas/strings.py - spark - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 """
 String functions on pandas-on-Spark Series
 """
 from typing import (
     Any,
     Callable,
     Dict,
     List,
     Optional,
     Union,
     TYPE_CHECKING,
     cast,
     no_type_check,
 )

 import numpy as np

 import pandas as pd
 from pyspark.sql.types import StringType, BinaryType, ArrayType, LongType, MapType
 from pyspark.sql import functions as F
 from pyspark.sql.functions import pandas_udf

 from pyspark.pandas.spark import functions as SF

 if TYPE_CHECKING:
     import pyspark.pandas as ps  # noqa: F401 (SPARK-34943)


 class StringMethods(object):
     """String methods for pandas-on-Spark Series"""

     def __init__(self, series: "ps.Series"):
         if not isinstance(series.spark.data_type, (StringType, BinaryType, ArrayType)):
             raise ValueError("Cannot call StringMethods on type {}".format(series.spark.data_type))
         self._data = series

     # Methods
     def capitalize(self) -> "ps.Series":
         """
         Convert Strings in the series to be capitalized.

         Examples
         --------
         >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
         >>> s
         0                 lower
         1              CAPITALS
         2    this is a sentence
         3              SwApCaSe
         dtype: object

         >>> s.str.capitalize()
         0                 Lower
         1              Capitals
         2    This is a sentence
         3              Swapcase
         dtype: object
         """

         @no_type_check
         def pandas_capitalize(s) -> "ps.Series[str]":
             return s.str.capitalize()

         return self._data.pandas_on_spark.transform_batch(pandas_capitalize)

     def title(self) -> "ps.Series":
         """
         Convert Strings in the series to be titlecase.

         Examples
         --------
         >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
         >>> s
         0                 lower
         1              CAPITALS
         2    this is a sentence
         3              SwApCaSe
         dtype: object

         >>> s.str.title()
         0                 Lower
         1              Capitals
         2    This Is A Sentence
         3              Swapcase
         dtype: object
         """

         @no_type_check
         def pandas_title(s) -> "ps.Series[str]":
             return s.str.title()

         return self._data.pandas_on_spark.transform_batch(pandas_title)

     def lower(self) -> "ps.Series":
         """
         Convert strings in the Series/Index to all lowercase.

         Examples
         --------
         >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
         >>> s
         0                 lower
         1              CAPITALS
         2    this is a sentence
         3              SwApCaSe
         dtype: object

         >>> s.str.lower()
         0                 lower
         1              capitals
         2    this is a sentence
         3              swapcase
         dtype: object
         """
         return self._data.spark.transform(F.lower)

     def upper(self) -> "ps.Series":
         """
         Convert strings in the Series/Index to all uppercase.

         Examples
         --------
         >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
         >>> s
         0                 lower
         1              CAPITALS
         2    this is a sentence
         3              SwApCaSe
         dtype: object

         >>> s.str.upper()
         0                 LOWER
         1              CAPITALS
         2    THIS IS A SENTENCE
         3              SWAPCASE
         dtype: object
         """
         return self._data.spark.transform(F.upper)

     def swapcase(self) -> "ps.Series":
         """
         Convert strings in the Series/Index to be swapcased.

         Examples
         --------
         >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
         >>> s
         0                 lower
         1              CAPITALS
         2    this is a sentence
         3              SwApCaSe
         dtype: object

         >>> s.str.swapcase()
         0                 LOWER
         1              capitals
         2    THIS IS A SENTENCE
         3              sWaPcAsE
         dtype: object
         """

         @no_type_check
         def pandas_swapcase(s) -> "ps.Series[str]":
             return s.str.swapcase()

         return self._data.pandas_on_spark.transform_batch(pandas_swapcase)

     def startswith(self, pattern: str, na: Optional[Any] = None) -> "ps.Series":
         """
         Test if the start of each string element matches a pattern.

         Equivalent to :func:`str.startswith`.

         Parameters
         ----------
         pattern : str
             Character sequence. Regular expressions are not accepted.
         na : object, default None
             Object shown if element is not a string. NaN converted to None.

         Returns
         -------
         Series of bool or object
             pandas-on-Spark Series of booleans indicating whether the given pattern
             matches the start of each string element.

         Examples
         --------
         >>> s = ps.Series(['bat', 'Bear', 'cat', np.nan])
         >>> s
         0     bat
         1    Bear
         2     cat
         3    None
         dtype: object

         >>> s.str.startswith('b')
         0     True
         1    False
         2    False
         3     None
         dtype: object

         Specifying na to be False instead of None.

         >>> s.str.startswith('b', na=False)
         0     True
         1    False
         2    False
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_startswith(s) -> "ps.Series[bool]":
             return s.str.startswith(pattern, na)

         return self._data.pandas_on_spark.transform_batch(pandas_startswith)

     def endswith(self, pattern: str, na: Optional[Any] = None) -> "ps.Series":
         """
         Test if the end of each string element matches a pattern.

         Equivalent to :func:`str.endswith`.

         Parameters
         ----------
         pattern : str
             Character sequence. Regular expressions are not accepted.
         na : object, default None
             Object shown if element is not a string. NaN converted to None.

         Returns
         -------
         Series of bool or object
             pandas-on-Spark Series of booleans indicating whether the given pattern
             matches the end of each string element.

         Examples
         --------
         >>> s = ps.Series(['bat', 'Bear', 'cat', np.nan])
         >>> s
         0     bat
         1    Bear
         2     cat
         3    None
         dtype: object

         >>> s.str.endswith('t')
         0     True
         1    False
         2     True
         3     None
         dtype: object

         Specifying na to be False instead of None.

         >>> s.str.endswith('t', na=False)
         0     True
         1    False
         2     True
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_endswith(s) -> "ps.Series[bool]":
             return s.str.endswith(pattern, na)

         return self._data.pandas_on_spark.transform_batch(pandas_endswith)

     def strip(self, to_strip: Optional[str] = None) -> "ps.Series":
         """
         Remove leading and trailing characters.

         Strip whitespaces (including newlines) or a set of specified
         characters from each string in the Series/Index from left and
         right sides. Equivalent to :func:`str.strip`.

         Parameters
         ----------
         to_strip : str
             Specifying the set of characters to be removed. All combinations
             of this set of characters will be stripped. If None then
             whitespaces are removed.

         Returns
         -------
         Series of objects

         Examples
         --------
         >>> s = ps.Series(['1. Ant.', '2. Bee!\\t', None])
         >>> s
         0      1. Ant.
         1    2. Bee!\\t
         2         None
         dtype: object

         >>> s.str.strip()
         0    1. Ant.
         1    2. Bee!
         2       None
         dtype: object

         >>> s.str.strip('12.')
         0        Ant
         1     Bee!\\t
         2       None
         dtype: object

         >>> s.str.strip('.!\\t')
         0    1. Ant
         1    2. Bee
         2      None
         dtype: object
         """

         @no_type_check
         def pandas_strip(s) -> "ps.Series[str]":
             return s.str.strip(to_strip)

         return self._data.pandas_on_spark.transform_batch(pandas_strip)

     def lstrip(self, to_strip: Optional[str] = None) -> "ps.Series":
         """
         Remove leading characters.

         Strip whitespaces (including newlines) or a set of specified
         characters from each string in the Series/Index from left side.
         Equivalent to :func:`str.lstrip`.

         Parameters
         ----------
         to_strip : str
             Specifying the set of characters to be removed. All combinations
             of this set of characters will be stripped. If None then
             whitespaces are removed.

         Returns
         -------
         Series of object

         Examples
         --------
         >>> s = ps.Series(['1. Ant.', '2. Bee!\\t', None])
         >>> s
         0      1. Ant.
         1    2. Bee!\\t
         2         None
         dtype: object

         >>> s.str.lstrip('12.')
         0       Ant.
         1     Bee!\\t
         2       None
         dtype: object
         """

         @no_type_check
         def pandas_lstrip(s) -> "ps.Series[str]":
             return s.str.lstrip(to_strip)

         return self._data.pandas_on_spark.transform_batch(pandas_lstrip)

     def rstrip(self, to_strip: Optional[str] = None) -> "ps.Series":
         """
         Remove trailing characters.

         Strip whitespaces (including newlines) or a set of specified
         characters from each string in the Series/Index from right side.
         Equivalent to :func:`str.rstrip`.

         Parameters
         ----------
         to_strip : str
             Specifying the set of characters to be removed. All combinations
             of this set of characters will be stripped. If None then
             whitespaces are removed.

         Returns
         -------
         Series of object

         Examples
         --------
         >>> s = ps.Series(['1. Ant.', '2. Bee!\\t', None])
         >>> s
         0      1. Ant.
         1    2. Bee!\\t
         2         None
         dtype: object

         >>> s.str.rstrip('.!\\t')
         0    1. Ant
         1    2. Bee
         2      None
         dtype: object
         """

         @no_type_check
         def pandas_rstrip(s) -> "ps.Series[str]":
             return s.str.rstrip(to_strip)

         return self._data.pandas_on_spark.transform_batch(pandas_rstrip)

     def get(self, i: int) -> "ps.Series":
         """
         Extract element from each string or string list/tuple in the Series
         at the specified position.

         Parameters
         ----------
         i : int
             Position of element to extract.

         Returns
         -------
         Series of objects

         Examples
         --------
         >>> s1 = ps.Series(["String", "123"])
         >>> s1
         0    String
         1       123
         dtype: object

         >>> s1.str.get(1)
         0    t
         1    2
         dtype: object

         >>> s1.str.get(-1)
         0    g
         1    3
         dtype: object

         >>> s2 = ps.Series([["a", "b", "c"], ["x", "y"]])
         >>> s2
         0    [a, b, c]
         1       [x, y]
         dtype: object

         >>> s2.str.get(0)
         0    a
         1    x
         dtype: object

         >>> s2.str.get(2)
         0       c
         1    None
         dtype: object
         """

         @no_type_check
         def pandas_get(s) -> "ps.Series[str]":
             return s.str.get(i)

         return self._data.pandas_on_spark.transform_batch(pandas_get)

     def isalnum(self) -> "ps.Series":
         """
         Check whether all characters in each string are alphanumeric.

         This is equivalent to running the Python string method
         :func:`str.isalnum` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s1 = ps.Series(['one', 'one1', '1', ''])

         >>> s1.str.isalnum()
         0     True
         1     True
         2     True
         3    False
         dtype: bool

         Note that checks against characters mixed with any additional
         punctuation or whitespace will evaluate to false for an alphanumeric
         check.

         >>> s2 = ps.Series(['A B', '1.5', '3,000'])
         >>> s2.str.isalnum()
         0    False
         1    False
         2    False
         dtype: bool
         """

         @no_type_check
         def pandas_isalnum(s) -> "ps.Series[bool]":
             return s.str.isalnum()

         return self._data.pandas_on_spark.transform_batch(pandas_isalnum)

     def isalpha(self) -> "ps.Series":
         """
         Check whether all characters in each string are alphabetic.

         This is equivalent to running the Python string method
         :func:`str.isalpha` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s1 = ps.Series(['one', 'one1', '1', ''])

         >>> s1.str.isalpha()
         0     True
         1    False
         2    False
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_isalpha(s) -> "ps.Series[bool]":
             return s.str.isalpha()

         return self._data.pandas_on_spark.transform_batch(pandas_isalpha)

     def isdigit(self) -> "ps.Series":
         """
         Check whether all characters in each string are digits.

         This is equivalent to running the Python string method
         :func:`str.isdigit` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s = ps.Series(['23', '³', '⅕', ''])

         The s.str.isdecimal method checks for characters used to form numbers
         in base 10.

         >>> s.str.isdecimal()
         0     True
         1    False
         2    False
         3    False
         dtype: bool

         The s.str.isdigit method is the same as s.str.isdecimal but also
         includes special digits, like superscripted and subscripted digits in
         unicode.

         >>> s.str.isdigit()
         0     True
         1     True
         2    False
         3    False
         dtype: bool

         The s.str.isnumeric method is the same as s.str.isdigit but also
         includes other characters that can represent quantities such as unicode
         fractions.

         >>> s.str.isnumeric()
         0     True
         1     True
         2     True
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_isdigit(s) -> "ps.Series[bool]":
             return s.str.isdigit()

         return self._data.pandas_on_spark.transform_batch(pandas_isdigit)

     def isspace(self) -> "ps.Series":
         """
         Check whether all characters in each string are whitespaces.

         This is equivalent to running the Python string method
         :func:`str.isspace` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s = ps.Series([' ', '\\t\\r\\n ', ''])
         >>> s.str.isspace()
         0     True
         1     True
         2    False
         dtype: bool
         """

         @no_type_check
         def pandas_isspace(s) -> "ps.Series[bool]":
             return s.str.isspace()

         return self._data.pandas_on_spark.transform_batch(pandas_isspace)

     def islower(self) -> "ps.Series":
         """
         Check whether all characters in each string are lowercase.

         This is equivalent to running the Python string method
         :func:`str.islower` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s = ps.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
         >>> s.str.islower()
         0     True
         1    False
         2    False
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_isspace(s) -> "ps.Series[bool]":
             return s.str.islower()

         return self._data.pandas_on_spark.transform_batch(pandas_isspace)

     def isupper(self) -> "ps.Series":
         """
         Check whether all characters in each string are uppercase.

         This is equivalent to running the Python string method
         :func:`str.isupper` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s = ps.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
         >>> s.str.isupper()
         0    False
         1    False
         2     True
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_isspace(s) -> "ps.Series[bool]":
             return s.str.isupper()

         return self._data.pandas_on_spark.transform_batch(pandas_isspace)

     def istitle(self) -> "ps.Series":
         """
         Check whether all characters in each string are titlecase.

         This is equivalent to running the Python string method
         :func:`str.istitle` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s = ps.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])

         The s.str.istitle method checks for whether all words are in title
         case (whether only the first letter of each word is capitalized).
         Words are assumed to be as any sequence of non-numeric characters
         separated by whitespace characters.

         >>> s.str.istitle()
         0    False
         1     True
         2    False
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_istitle(s) -> "ps.Series[bool]":
             return s.str.istitle()

         return self._data.pandas_on_spark.transform_batch(pandas_istitle)

     def isnumeric(self) -> "ps.Series":
         """
         Check whether all characters in each string are numeric.

         This is equivalent to running the Python string method
         :func:`str.isnumeric` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s1 = ps.Series(['one', 'one1', '1', ''])
         >>> s1.str.isnumeric()
         0    False
         1    False
         2     True
         3    False
         dtype: bool

         >>> s2 = ps.Series(['23', '³', '⅕', ''])

         The s2.str.isdecimal method checks for characters used to form numbers
         in base 10.

         >>> s2.str.isdecimal()
         0     True
         1    False
         2    False
         3    False
         dtype: bool

         The s2.str.isdigit method is the same as s2.str.isdecimal but also
         includes special digits, like superscripted and subscripted digits in
         unicode.

         >>> s2.str.isdigit()
         0     True
         1     True
         2    False
         3    False
         dtype: bool

         The s2.str.isnumeric method is the same as s2.str.isdigit but also
         includes other characters that can represent quantities such as unicode
         fractions.

         >>> s2.str.isnumeric()
         0     True
         1     True
         2     True
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_isnumeric(s) -> "ps.Series[bool]":
             return s.str.isnumeric()

         return self._data.pandas_on_spark.transform_batch(pandas_isnumeric)

     def isdecimal(self) -> "ps.Series":
         """
         Check whether all characters in each string are decimals.

         This is equivalent to running the Python string method
         :func:`str.isdecimal` for each element of the Series/Index.
         If a string has zero characters, False is returned for that check.

         Examples
         --------
         >>> s = ps.Series(['23', '³', '⅕', ''])

         The s.str.isdecimal method checks for characters used to form numbers
         in base 10.

         >>> s.str.isdecimal()
         0     True
         1    False
         2    False
         3    False
         dtype: bool

         The s.str.isdigit method is the same as s.str.isdecimal but also
         includes special digits, like superscripted and subscripted digits in
         unicode.

         >>> s.str.isdigit()
         0     True
         1     True
         2    False
         3    False
         dtype: bool

         The s.str.isnumeric method is the same as s.str.isdigit but also
         includes other characters that can represent quantities such as unicode
         fractions.

         >>> s.str.isnumeric()
         0     True
         1     True
         2     True
         3    False
         dtype: bool
         """

         @no_type_check
         def pandas_isdecimal(s) -> "ps.Series[bool]":
             return s.str.isdecimal()

         return self._data.pandas_on_spark.transform_batch(pandas_isdecimal)

     @no_type_check
     def cat(self, others=None, sep=None, na_rep=None, join=None) -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     def center(self, width: int, fillchar: str = " ") -> "ps.Series":
         """
         Filling left and right side of strings in the Series/Index with an
         additional character. Equivalent to :func:`str.center`.

         Parameters
         ----------
         width : int
             Minimum width of resulting string; additional characters will be
             filled with fillchar.
         fillchar : str
             Additional character for filling, default is whitespace.

         Returns
         -------
         Series of objects

         Examples
         --------
         >>> s = ps.Series(["caribou", "tiger"])
         >>> s
         0    caribou
         1      tiger
         dtype: object

         >>> s.str.center(width=10, fillchar='-')
         0    -caribou--
         1    --tiger---
         dtype: object
         """

         @no_type_check
         def pandas_center(s) -> "ps.Series[str]":
             return s.str.center(width, fillchar)

         return self._data.pandas_on_spark.transform_batch(pandas_center)

     def contains(
         self, pat: str, case: bool = True, flags: int = 0, na: Any = None, regex: bool = True
     ) -> "ps.Series":
         """
         Test if pattern or regex is contained within a string of a Series.

         Return boolean Series based on whether a given pattern or regex is
         contained within a string of a Series.

         Analogous to :func:`match`, but less strict, relying on
         :func:`re.search` instead of :func:`re.match`.

         Parameters
         ----------
         pat : str
             Character sequence or regular expression.
         case : bool, default True
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Flags to pass through to the re module, e.g. re.IGNORECASE.
         na : default None
             Fill value for missing values. NaN converted to None.
         regex : bool, default True
             If True, assumes the pat is a regular expression.
             If False, treats the pat as a literal string.


         Returns
         -------
         Series of boolean values or object
             A Series of boolean values indicating whether the given pattern is
             contained within the string of each element of the Series.

         Examples
         --------
         Returning a Series of booleans using only a literal pattern.

         >>> s1 = ps.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
         >>> s1.str.contains('og', regex=False)
         0    False
         1     True
         2    False
         3    False
         4     None
         dtype: object

         Specifying case sensitivity using case.

         >>> s1.str.contains('oG', case=True, regex=True)
         0    False
         1    False
         2    False
         3    False
         4     None
         dtype: object

         Specifying na to be False instead of NaN replaces NaN values with
         False. If Series does not contain NaN values the resultant dtype will
         be bool, otherwise, an object dtype.

         >>> s1.str.contains('og', na=False, regex=True)
         0    False
         1     True
         2    False
         3    False
         4    False
         dtype: bool

         Returning ‘house’ or ‘dog’ when either expression occurs in a string.

         >>> s1.str.contains('house|dog', regex=True)
         0    False
         1     True
         2     True
         3    False
         4     None
         dtype: object

         Ignoring case sensitivity using flags with regex.

         >>> import re
         >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
         0    False
         1    False
         2     True
         3    False
         4     None
         dtype: object

         Returning any digit using regular expression.

         >>> s1.str.contains('[0-9]', regex=True)
         0    False
         1    False
         2    False
         3     True
         4     None
         dtype: object

         Ensure pat is a not a literal pattern when regex is set to True.
         Note in the following example one might expect only s2[1] and s2[3]
         to return True. However, ‘.0’ as a regex matches any character followed
         by a 0.

         >>> s2 = ps.Series(['40','40.0','41','41.0','35'])
         >>> s2.str.contains('.0', regex=True)
         0     True
         1     True
         2    False
         3     True
         4    False
         dtype: bool
         """

         @no_type_check
         def pandas_contains(s) -> "ps.Series[bool]":
             return s.str.contains(pat, case, flags, na, regex)

         return self._data.pandas_on_spark.transform_batch(pandas_contains)

     def count(self, pat: str, flags: int = 0) -> "ps.Series":
         """
         Count occurrences of pattern in each string of the Series.

         This function is used to count the number of times a particular regex
         pattern is repeated in each of the string elements of the Series.

         Parameters
         ----------
         pat : str
             Valid regular expression.
         flags : int, default 0 (no flags)
             Flags for the re module.

         Returns
         -------
         Series of int
             A Series containing the integer counts of pattern matches.

         Examples
         --------
         >>> s = ps.Series(['A', 'B', 'Aaba', 'Baca', np.NaN, 'CABA', 'cat'])
         >>> s.str.count('a')
         0    0.0
         1    0.0
         2    2.0
         3    2.0
         4    NaN
         5    0.0
         6    1.0
         dtype: float64

         Escape '$' to find the literal dollar sign.

         >>> s = ps.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
         >>> s.str.count('\\$')
         0    1
         1    0
         2    1
         3    2
         4    2
         5    0
         dtype: int64
         """

         @no_type_check
         def pandas_count(s) -> "ps.Series[int]":
             return s.str.count(pat, flags)

         return self._data.pandas_on_spark.transform_batch(pandas_count)

     @no_type_check
     def decode(self, encoding, errors="strict") -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     @no_type_check
     def encode(self, encoding, errors="strict") -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     @no_type_check
     def extract(self, pat, flags=0, expand=True) -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     @no_type_check
     def extractall(self, pat, flags=0) -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
         Return lowest indexes in each strings in the Series where the
         substring is fully contained between [start:end].

         Return -1 on failure. Equivalent to standard :func:`str.find`.

         Parameters
         ----------
         sub : str
             Substring being searched.
         start : int
             Left edge index.
         end : int
             Right edge index.

         Returns
         -------
         Series of int
             Series of lowest matching indexes.

         Examples
         --------
         >>> s = ps.Series(['apple', 'oranges', 'bananas'])

         >>> s.str.find('a')
         0    0
         1    2
         2    1
         dtype: int64

         >>> s.str.find('a', start=2)
         0   -1
         1    2
         2    3
         dtype: int64

         >>> s.str.find('a', end=1)
         0    0
         1   -1
         2   -1
         dtype: int64

         >>> s.str.find('a', start=2, end=2)
         0   -1
         1   -1
         2   -1
         dtype: int64
         """

         @no_type_check
         def pandas_find(s) -> "ps.Series[int]":
             return s.str.find(sub, start, end)

         return self._data.pandas_on_spark.transform_batch(pandas_find)

     def findall(self, pat: str, flags: int = 0) -> "ps.Series":
         """
         Find all occurrences of pattern or regular expression in the Series.

         Equivalent to applying :func:`re.findall` to all the elements in
         the Series.

         Parameters
         ----------
         pat : str
             Pattern or regular expression.
         flags : int, default 0 (no flags)
             `re` module flags, e.g. `re.IGNORECASE`.

         Returns
         -------
         Series of object
             All non-overlapping matches of pattern or regular expression in
             each string of this Series.

         Examples
         --------
         >>> s = ps.Series(['Lion', 'Monkey', 'Rabbit'])

         The search for the pattern ‘Monkey’ returns one match:

         >>> s.str.findall('Monkey')
         0          []
         1    [Monkey]
         2          []
         dtype: object

         On the other hand, the search for the pattern ‘MONKEY’ doesn’t return
         any match:

         >>> s.str.findall('MONKEY')
         0    []
         1    []
         2    []
         dtype: object

         Flags can be added to the pattern or regular expression. For instance,
         to find the pattern ‘MONKEY’ ignoring the case:

         >>> import re
         >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
         0          []
         1    [Monkey]
         2          []
         dtype: object

         When the pattern matches more than one string in the Series, all
         matches are returned:

         >>> s.str.findall('on')
         0    [on]
         1    [on]
         2      []
         dtype: object

         Regular expressions are supported too. For instance, the search for all
         the strings ending with the word ‘on’ is shown next:

         >>> s.str.findall('on$')
         0    [on]
         1      []
         2      []
         dtype: object

         If the pattern is found more than once in the same string, then a list
         of multiple strings is returned:

         >>> s.str.findall('b')
         0        []
         1        []
         2    [b, b]
         dtype: object
         """
         # type hint does not support to specify array type yet.
         @pandas_udf(returnType=ArrayType(StringType(), containsNull=True))  # type: ignore
         def pudf(s: pd.Series) -> pd.Series:
             return s.str.findall(pat, flags)

         return self._data._with_new_scol(scol=pudf(self._data.spark.column))

     def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
         Return lowest indexes in each strings where the substring is fully
         contained between [start:end].

         This is the same as :func:`str.find` except instead of returning -1,
         it raises a ValueError when the substring is not found. Equivalent to
         standard :func:`str.index`.

         Parameters
         ----------
         sub : str
             Substring being searched.
         start : int
             Left edge index.
         end : int
             Right edge index.

         Returns
         -------
         Series of int
             Series of lowest matching indexes.

         Examples
         --------
         >>> s = ps.Series(['apple', 'oranges', 'bananas'])

         >>> s.str.index('a')
         0    0
         1    2
         2    1
         dtype: int64

         The following expression throws an exception:

         >>> s.str.index('a', start=2) # doctest: +SKIP
         """

         @no_type_check
         def pandas_index(s) -> "ps.Series[np.int64]":
             return s.str.index(sub, start, end)

         return self._data.pandas_on_spark.transform_batch(pandas_index)

     def join(self, sep: str) -> "ps.Series":
         """
         Join lists contained as elements in the Series with passed delimiter.

         If the elements of a Series are lists themselves, join the content of
         these lists using the delimiter passed to the function. This function
         is an equivalent to calling :func:`str.join` on the lists.

         Parameters
         ----------
         sep : str
             Delimiter to use between list entries.

         Returns
         -------
         Series of object
             Series with list entries concatenated by intervening occurrences of
             the delimiter.

         See Also
         --------
         str.split : Split strings around given separator/delimiter.
         str.rsplit : Splits string around given separator/delimiter,
             starting from the right.

         Examples
         --------
         Example with a list that contains a None element.

         >>> s = ps.Series([['lion', 'elephant', 'zebra'],
         ...                ['cat', None, 'dog']])
         >>> s
         0    [lion, elephant, zebra]
         1           [cat, None, dog]
         dtype: object

         Join all lists using a ‘-‘. The list containing None will produce None.

         >>> s.str.join('-')
         0    lion-elephant-zebra
         1                   None
         dtype: object
         """

         @no_type_check
         def pandas_join(s) -> "ps.Series[str]":
             return s.str.join(sep)

         return self._data.pandas_on_spark.transform_batch(pandas_join)

     def len(self) -> "ps.Series":
         """
         Computes the length of each element in the Series.

         The element may be a sequence (such as a string, tuple or list).

         Returns
         -------
         Series of int
             A Series of integer values indicating the length of each element in
             the Series.

         Examples
         --------
         Returns the length (number of characters) in a string. Returns the
         number of entries for lists or tuples.

         >>> s1 = ps.Series(['dog', 'monkey'])
         >>> s1.str.len()
         0    3
         1    6
         dtype: int64

         >>> s2 = ps.Series([["a", "b", "c"], []])
         >>> s2.str.len()
         0    3
         1    0
         dtype: int64
         """
         if isinstance(self._data.spark.data_type, (ArrayType, MapType)):
             return self._data.spark.transform(lambda c: F.size(c).cast(LongType()))
         else:
             return self._data.spark.transform(lambda c: F.length(c).cast(LongType()))

     def ljust(self, width: int, fillchar: str = " ") -> "ps.Series":
         """
         Filling right side of strings in the Series with an additional
         character. Equivalent to :func:`str.ljust`.

         Parameters
         ----------
         width : int
             Minimum width of resulting string; additional characters will be
             filled with `fillchar`.
         fillchar : str
             Additional character for filling, default is whitespace.

         Returns
         -------
         Series of object

         Examples
         --------
         >>> s = ps.Series(["caribou", "tiger"])
         >>> s
         0    caribou
         1      tiger
         dtype: object

         >>> s.str.ljust(width=10, fillchar='-')
         0    caribou---
         1    tiger-----
         dtype: object
         """

         @no_type_check
         def pandas_ljust(s) -> "ps.Series[str]":
             return s.str.ljust(width, fillchar)

         return self._data.pandas_on_spark.transform_batch(pandas_ljust)

     def match(self, pat: str, case: bool = True, flags: int = 0, na: Any = np.NaN) -> "ps.Series":
         """
         Determine if each string matches a regular expression.

         Analogous to :func:`contains`, but more strict, relying on
         :func:`re.match` instead of :func:`re.search`.

         Parameters
         ----------
         pat : str
             Character sequence or regular expression.
         case : bool, default True
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Flags to pass through to the re module, e.g. re.IGNORECASE.
         na : default NaN
             Fill value for missing values.

         Returns
         -------
         Series of boolean values or object
             A Series of boolean values indicating whether the given pattern can
             be matched in the string of each element of the Series.

         Examples
         --------
         >>> s = ps.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
         >>> s.str.match('dog')
         0    False
         1     True
         2    False
         3    False
         4     None
         dtype: object

         >>> s.str.match('mouse|dog', case=False)
         0     True
         1     True
         2    False
         3    False
         4     None
         dtype: object

         >>> s.str.match('.+and.+', na=True)
         0    False
         1    False
         2     True
         3    False
         4     True
         dtype: bool

         >>> import re
         >>> s.str.match('MOUSE', flags=re.IGNORECASE)
         0     True
         1    False
         2    False
         3    False
         4     None
         dtype: object
         """

         @no_type_check
         def pandas_match(s) -> "ps.Series[bool]":
             return s.str.match(pat, case, flags, na)

         return self._data.pandas_on_spark.transform_batch(pandas_match)

     def normalize(self, form: str) -> "ps.Series":
         """
         Return the Unicode normal form for the strings in the Series.

         For more information on the forms, see the
         :func:`unicodedata.normalize`.

         Parameters
         ----------
         form : {‘NFC’, ‘NFKC’, ‘NFD’, ‘NFKD’}
             Unicode form.

         Returns
         -------
         Series of objects
             A Series of normalized strings.
         """

         @no_type_check
         def pandas_normalize(s) -> "ps.Series[str]":
             return s.str.normalize(form)

         return self._data.pandas_on_spark.transform_batch(pandas_normalize)

     def pad(self, width: int, side: str = "left", fillchar: str = " ") -> "ps.Series":
         """
         Pad strings in the Series up to width.

         Parameters
         ----------
         width : int
             Minimum width of resulting string; additional characters will be
             filled with character defined in `fillchar`.
         side : {‘left’, ‘right’, ‘both’}, default ‘left’
             Side from which to fill resulting string.
         fillchar : str, default ' '
             Additional character for filling, default is whitespace.

         Returns
         -------
         Series of object
             Returns Series with minimum number of char in object.

         Examples
         --------
         >>> s = ps.Series(["caribou", "tiger"])
         >>> s
         0    caribou
         1      tiger
         dtype: object

         >>> s.str.pad(width=10)
         0       caribou
         1         tiger
         dtype: object

         >>> s.str.pad(width=10, side='right', fillchar='-')
         0    caribou---
         1    tiger-----
         dtype: object

         >>> s.str.pad(width=10, side='both', fillchar='-')
         0    -caribou--
         1    --tiger---
         dtype: object
         """

         @no_type_check
         def pandas_pad(s) -> "ps.Series[str]":
             return s.str.pad(width, side, fillchar)

         return self._data.pandas_on_spark.transform_batch(pandas_pad)

     def partition(self, sep: str = " ", expand: bool = True) -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     def repeat(self, repeats: int) -> "ps.Series":
         """
         Duplicate each string in the Series.

         Parameters
         ----------
         repeats : int
             Repeat the string given number of times (int). Sequence of int
             is not supported.

         Returns
         -------
         Series of object
             Series or Index of repeated string objects specified by input
             parameter repeats.

         Examples
         --------
         >>> s = ps.Series(['a', 'b', 'c'])
         >>> s
         0    a
         1    b
         2    c
         dtype: object

         Single int repeats string in Series

         >>> s.str.repeat(repeats=2)
         0    aa
         1    bb
         2    cc
         dtype: object
         """
         if not isinstance(repeats, int):
             raise TypeError("repeats expects an int parameter")
         return self._data.spark.transform(lambda c: SF.repeat(col=c, n=repeats))

     def replace(
         self,
         pat: str,
         repl: Union[str, Callable[[str], str]],
         n: int = -1,
         case: Optional[bool] = None,
         flags: int = 0,
         regex: bool = True,
     ) -> "ps.Series":
         """
         Replace occurrences of pattern/regex in the Series with some other
         string. Equivalent to :func:`str.replace` or :func:`re.sub`.

         Parameters
         ----------
         pat : str or compiled regex
             String can be a character sequence or regular expression.
         repl : str or callable
             Replacement string or a callable. The callable is passed the regex
             match object and must return a replacement string to be used. See
             :func:`re.sub`.
         n : int, default -1 (all)
             Number of replacements to make from start.
         case : boolean, default None
             If True, case sensitive (the default if pat is a string).
             Set to False for case insensitive.
             Cannot be set if pat is a compiled regex.
         flags: int, default 0 (no flags)
             re module flags, e.g. re.IGNORECASE.
             Cannot be set if pat is a compiled regex.
         regex : boolean, default True
             If True, assumes the passed-in pattern is a regular expression.
             If False, treats the pattern as a literal string.
             Cannot be set to False if pat is a compile regex or repl is a
             callable.

         Returns
         -------
         Series of object
             A copy of the string with all matching occurrences of pat replaced
             by repl.

         Examples
         --------
         When pat is a string and regex is True (the default), the given pat is
         compiled as a regex. When repl is a string, it replaces matching regex
         patterns as with :func:`re.sub`. NaN value(s) in the Series are changed
         to None:

         >>> ps.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
         0     bao
         1     baz
         2    None
         dtype: object

         When pat is a string and regex is False, every pat is replaced with
         repl as with :func:`str.replace`:

         >>> ps.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
         0     bao
         1     fuz
         2    None
         dtype: object

         When repl is a callable, it is called on every pat using
         :func:`re.sub`. The callable should expect one positional argument (a
         regex object) and return a string.

         Reverse every lowercase alphabetic word:

         >>> repl = lambda m: m.group(0)[::-1]
         >>> ps.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)
         0    oof 123
         1    rab zab
         2       None
         dtype: object

         Using regex groups (extract second group and swap case):

         >>> pat = r"(?P<one>\\w+) (?P<two>\\w+) (?P<three>\\w+)"
         >>> repl = lambda m: m.group('two').swapcase()
         >>> ps.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)
         0    tWO
         1    bAR
         dtype: object

         Using a compiled regex with flags:

         >>> import re
         >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
         >>> ps.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')
         0     foo
         1     bar
         2    None
         dtype: object
         """

         @no_type_check
         def pandas_replace(s) -> "ps.Series[str]":
             return s.str.replace(pat, repl, n=n, case=case, flags=flags, regex=regex)

         return self._data.pandas_on_spark.transform_batch(pandas_replace)

     def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
         Return highest indexes in each strings in the Series where the
         substring is fully contained between [start:end].

         Return -1 on failure. Equivalent to standard :func:`str.rfind`.

         Parameters
         ----------
         sub : str
             Substring being searched.
         start : int
             Left edge index.
         end : int
             Right edge index.

         Returns
         -------
         Series of int
             Series of highest matching indexes.

         Examples
         --------
         >>> s = ps.Series(['apple', 'oranges', 'bananas'])

         >>> s.str.rfind('a')
         0    0
         1    2
         2    5
         dtype: int64

         >>> s.str.rfind('a', start=2)
         0   -1
         1    2
         2    5
         dtype: int64

         >>> s.str.rfind('a', end=1)
         0    0
         1   -1
         2   -1
         dtype: int64

         >>> s.str.rfind('a', start=2, end=2)
         0   -1
         1   -1
         2   -1
         dtype: int64
         """

         @no_type_check
         def pandas_rfind(s) -> "ps.Series[int]":
             return s.str.rfind(sub, start, end)

         return self._data.pandas_on_spark.transform_batch(pandas_rfind)

     def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
         Return highest indexes in each strings where the substring is fully
         contained between [start:end].

         This is the same as :func:`str.rfind` except instead of returning -1,
         it raises a ValueError when the substring is not found. Equivalent to
         standard :func:`str.rindex`.

         Parameters
         ----------
         sub : str
             Substring being searched.
         start : int
             Left edge index.
         end : int
             Right edge index.

         Returns
         -------
         Series of int
             Series of highest matching indexes.

         Examples
         --------
         >>> s = ps.Series(['apple', 'oranges', 'bananas'])

         >>> s.str.rindex('a')
         0    0
         1    2
         2    5
         dtype: int64

         The following expression throws an exception:

         >>> s.str.rindex('a', start=2) # doctest: +SKIP
         """

         @no_type_check
         def pandas_rindex(s) -> "ps.Series[np.int64]":
             return s.str.rindex(sub, start, end)

         return self._data.pandas_on_spark.transform_batch(pandas_rindex)

     def rjust(self, width: int, fillchar: str = " ") -> "ps.Series":
         """
         Filling left side of strings in the Series with an additional
         character. Equivalent to :func:`str.rjust`.

         Parameters
         ----------
         width : int
             Minimum width of resulting string; additional characters will be
             filled with `fillchar`.
         fillchar : str
             Additional character for filling, default is whitespace.

         Returns
         -------
         Series of object

         Examples
         --------
         >>> s = ps.Series(["caribou", "tiger"])
         >>> s
         0    caribou
         1      tiger
         dtype: object

         >>> s.str.rjust(width=10)
         0       caribou
         1         tiger
         dtype: object

         >>> s.str.rjust(width=10, fillchar='-')
         0    ---caribou
         1    -----tiger
         dtype: object
         """

         @no_type_check
         def pandas_rjust(s) -> "ps.Series[str]":
             return s.str.rjust(width, fillchar)

         return self._data.pandas_on_spark.transform_batch(pandas_rjust)

     def rpartition(self, sep: str = " ", expand: bool = True) -> "ps.Series":
         """
         Not supported.
         """
         raise NotImplementedError()

     def slice(
         self, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None
     ) -> "ps.Series":
         """
         Slice substrings from each element in the Series.

         Parameters
         ----------
         start : int, optional
             Start position for slice operation.
         stop : int, optional
             Stop position for slice operation.
         step : int, optional
             Step size for slice operation.

         Returns
         -------
         Series of object
             Series from sliced substrings from original string objects.

         Examples
         --------
         >>> s = ps.Series(["koala", "fox", "chameleon"])
         >>> s
         0        koala
         1          fox
         2    chameleon
         dtype: object

         >>> s.str.slice(start=1)
         0        oala
         1          ox
         2    hameleon
         dtype: object

         >>> s.str.slice(stop=2)
         0    ko
         1    fo
         2    ch
         dtype: object

         >>> s.str.slice(step=2)
         0      kaa
         1       fx
         2    caeen
         dtype: object

         >>> s.str.slice(start=0, stop=5, step=3)
         0    kl
         1     f
         2    cm
         dtype: object
         """

         @no_type_check
         def pandas_slice(s) -> "ps.Series[str]":
             return s.str.slice(start, stop, step)

         return self._data.pandas_on_spark.transform_batch(pandas_slice)

     def slice_replace(
         self, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
     ) -> "ps.Series":
         """
         Slice substrings from each element in the Series.

         Parameters
         ----------
         start : int, optional
             Start position for slice operation. If not specified (None), the
             slice is unbounded on the left, i.e. slice from the start of the
             string.
         stop : int, optional
             Stop position for slice operation. If not specified (None), the
             slice is unbounded on the right, i.e. slice until the end of the
             string.
         repl : str, optional
             String for replacement. If not specified (None), the sliced region
             is replaced with an empty string.

         Returns
         -------
         Series of object
             Series from sliced substrings from original string objects.

         Examples
         --------
         >>> s = ps.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
         >>> s
         0        a
         1       ab
         2      abc
         3     abdc
         4    abcde
         dtype: object

         Specify just start, meaning replace start until the end of the string
         with repl.

         >>> s.str.slice_replace(1, repl='X')
         0    aX
         1    aX
         2    aX
         3    aX
         4    aX
         dtype: object

         Specify just stop, meaning the start of the string to stop is replaced
         with repl, and the rest of the string is included.

         >>> s.str.slice_replace(stop=2, repl='X')
         0       X
         1       X
         2      Xc
         3     Xdc
         4    Xcde
         dtype: object

         Specify start and stop, meaning the slice from start to stop is
         replaced with repl. Everything before or after start and stop is
         included as is.

         >>> s.str.slice_replace(start=1, stop=3, repl='X')
         0      aX
         1      aX
         2      aX
         3     aXc
         4    aXde
         dtype: object
         """

         @no_type_check
         def pandas_slice_replace(s) -> "ps.Series[str]":
             return s.str.slice_replace(start, stop, repl)

         return self._data.pandas_on_spark.transform_batch(pandas_slice_replace)

     def split(
         self, pat: Optional[str] = None, n: int = -1, expand: bool = False
     ) -> Union["ps.Series", "ps.DataFrame"]:
         """
         Split strings around given separator/delimiter.

         Splits the string in the Series from the beginning, at the specified
         delimiter string. Equivalent to :func:`str.split`.

         Parameters
         ----------
         pat : str, optional
             String or regular expression to split on. If not specified, split
             on whitespace.
         n : int, default -1 (all)
             Limit number of splits in output. None, 0 and -1 will be
             interpreted as return all splits.
         expand : bool, default False
             Expand the splitted strings into separate columns.

             * If ``True``, `n` must be a positive integer, and return DataFrame expanding
               dimensionality.
             * If ``False``, return Series, containing lists of strings.

         Returns
         -------
         Series, DataFrame
             Type matches caller unless `expand=True` (see Notes).

         See Also
         --------
         str.rsplit : Splits string around given separator/delimiter,
             starting from the right.
         str.join : Join lists contained as elements in the Series/Index
             with passed delimiter.

         Notes
         -----
         The handling of the `n` keyword depends on the number of found splits:

         - If found splits > `n`,  make first `n` splits only
         - If found splits <= `n`, make all splits
         - If for a certain row the number of found splits < `n`,
           append `None` for padding up to `n` if ``expand=True``

         If using ``expand=True``, Series callers return DataFrame objects with `n + 1` columns.

         .. note:: Even if `n` is much larger than found splits, the number of columns does NOT
             shrink unlike pandas.

         Examples
         --------
         >>> s = ps.Series(["this is a regular sentence",
         ...                "https://docs.python.org/3/tutorial/index.html",
         ...                np.nan])

         In the default setting, the string is split by whitespace.

         >>> s.str.split()
         0                   [this, is, a, regular, sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         Without the n parameter, the outputs of rsplit and split are identical.

         >>> s.str.rsplit()
         0                   [this, is, a, regular, sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         The n parameter can be used to limit the number of splits on the
         delimiter. The outputs of split and rsplit are different.

         >>> s.str.split(n=2)
         0                     [this, is, a regular sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         >>> s.str.rsplit(n=2)
         0                     [this is a, regular, sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         The pat parameter can be used to split by other characters.

         >>> s.str.split(pat = "/")
         0                         [this is a regular sentence]
         1    [https:, , docs.python.org, 3, tutorial, index...
         2                                                 None
         dtype: object

         When using ``expand=True``, the split elements will expand out into
         separate columns. If NaN is present, it is propagated throughout
         the columns during the split.

         >>> s.str.split(n=4, expand=True)
                                                        0     1     2        3         4
         0                                           this    is     a  regular  sentence
         1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
         2                                           None  None  None     None      None

         For slightly more complex use cases like splitting the html document name
         from a url, a combination of parameter settings can be used.

         >>> s.str.rsplit("/", n=1, expand=True)
                                             0           1
         0          this is a regular sentence        None
         1  https://docs.python.org/3/tutorial  index.html
         2                                None        None

         Remember to escape special characters when explicitly using regular
         expressions.

         >>> s = ps.Series(["1+1=2"])
         >>> s.str.split(r"\\+|=", n=2, expand=True)
            0  1  2
         0  1  1  2
         """
         from pyspark.pandas.frame import DataFrame

         if expand and n <= 0:
             raise NotImplementedError("expand=True is currently only supported with n > 0.")

         # type hint does not support to specify array type yet.
         return_type = ArrayType(StringType(), containsNull=True)

         @pandas_udf(returnType=return_type)  # type: ignore
         def pudf(s: pd.Series) -> pd.Series:
             return s.str.split(pat, n)

         psser = self._data._with_new_scol(
             pudf(self._data.spark.column).alias(self._data._internal.data_spark_column_names[0]),
             field=self._data._internal.data_fields[0].copy(spark_type=return_type, nullable=True),
         )

         if expand:
             psdf = psser.to_frame()
             scol = psdf._internal.data_spark_columns[0]
             spark_columns = [scol[i].alias(str(i)) for i in range(n + 1)]
             column_labels = [(i,) for i in range(n + 1)]
             internal = psdf._internal.with_new_columns(
                 spark_columns,
                 column_labels=cast(Optional[List], column_labels),
                 data_fields=[
                     self._data._internal.data_fields[0].copy(name=str(i), nullable=True)
                     for i in range(n + 1)
                 ],
             )
             return DataFrame(internal)
         else:
             return psser

     def rsplit(
         self, pat: Optional[str] = None, n: int = -1, expand: bool = False
     ) -> Union["ps.Series", "ps.DataFrame"]:
         """
         Split strings around given separator/delimiter.

         Splits the string in the Series from the end, at the specified
         delimiter string. Equivalent to :func:`str.rsplit`.

         Parameters
         ----------
         pat : str, optional
             String or regular expression to split on. If not specified, split
             on whitespace.
         n : int, default -1 (all)
             Limit number of splits in output. None, 0 and -1 will be
             interpreted as return all splits.
         expand : bool, default False
             Expand the splitted strings into separate columns.

             * If ``True``, `n` must be a positive integer, and return DataFrame expanding
               dimensionality.
             * If ``False``, return Series, containing lists of strings.

         Returns
         -------
         Series, DataFrame
             Type matches caller unless `expand=True` (see Notes).

         See Also
         --------
         str.split : Split strings around given separator/delimiter.
         str.join : Join lists contained as elements in the Series/Index
             with passed delimiter.

         Notes
         -----
         The handling of the `n` keyword depends on the number of found splits:

         - If found splits > `n`,  make first `n` splits only
         - If found splits <= `n`, make all splits
         - If for a certain row the number of found splits < `n`,
           append `None` for padding up to `n` if ``expand=True``

         If using ``expand=True``, Series callers return DataFrame objects with `n + 1` columns.

         .. note:: Even if `n` is much larger than found splits, the number of columns does NOT
             shrink unlike pandas.

         Examples
         --------
         >>> s = ps.Series(["this is a regular sentence",
         ...                "https://docs.python.org/3/tutorial/index.html",
         ...                np.nan])

         In the default setting, the string is split by whitespace.

         >>> s.str.split()
         0                   [this, is, a, regular, sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         Without the n parameter, the outputs of rsplit and split are identical.

         >>> s.str.rsplit()
         0                   [this, is, a, regular, sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         The n parameter can be used to limit the number of splits on the
         delimiter. The outputs of split and rsplit are different.

         >>> s.str.split(n=2)
         0                     [this, is, a regular sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         >>> s.str.rsplit(n=2)
         0                     [this is a, regular, sentence]
         1    [https://docs.python.org/3/tutorial/index.html]
         2                                               None
         dtype: object

         When using ``expand=True``, the split elements will expand out into
         separate columns. If NaN is present, it is propagated throughout
         the columns during the split.

         >>> s.str.split(n=4, expand=True)
                                                        0     1     2        3         4
         0                                           this    is     a  regular  sentence
         1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
         2                                           None  None  None     None      None

         For slightly more complex use cases like splitting the html document name
         from a url, a combination of parameter settings can be used.

         >>> s.str.rsplit("/", n=1, expand=True)
                                             0           1
         0          this is a regular sentence        None
         1  https://docs.python.org/3/tutorial  index.html
         2                                None        None

         Remember to escape special characters when explicitly using regular
         expressions.

         >>> s = ps.Series(["1+1=2"])
         >>> s.str.split(r"\\+|=", n=2, expand=True)
            0  1  2
         0  1  1  2
         """
         from pyspark.pandas.frame import DataFrame

         if expand and n <= 0:
             raise NotImplementedError("expand=True is currently only supported with n > 0.")

         # type hint does not support to specify array type yet.
         return_type = ArrayType(StringType(), containsNull=True)

         @pandas_udf(returnType=return_type)  # type: ignore
         def pudf(s: pd.Series) -> pd.Series:
             return s.str.rsplit(pat, n)

         psser = self._data._with_new_scol(
             pudf(self._data.spark.column).alias(self._data._internal.data_spark_column_names[0]),
             field=self._data._internal.data_fields[0].copy(spark_type=return_type, nullable=True),
         )

         if expand:
             psdf = psser.to_frame()
             scol = psdf._internal.data_spark_columns[0]
             spark_columns = [scol[i].alias(str(i)) for i in range(n + 1)]
             column_labels = [(i,) for i in range(n + 1)]
             internal = psdf._internal.with_new_columns(
                 spark_columns,
                 column_labels=cast(Optional[List], column_labels),
                 data_fields=[
                     self._data._internal.data_fields[0].copy(name=str(i), nullable=True)
                     for i in range(n + 1)
                 ],
             )
             return DataFrame(internal)
         else:
             return psser

     def translate(self, table: Dict) -> "ps.Series":
         """
         Map all characters in the string through the given mapping table.
         Equivalent to standard :func:`str.translate`.

         Parameters
         ----------
         table : dict
             Table is a mapping of Unicode ordinals to Unicode ordinals,
             strings, or None. Unmapped characters are left untouched.
             Characters mapped to None are deleted. :func:`str.maketrans` is a
             helper function for making translation tables.

         Returns
         -------
         Series of object
             Series with translated strings.

         Examples
         --------
         >>> s = ps.Series(["dog", "cat", "bird"])
         >>> m = str.maketrans({'a': 'X', 'i': 'Y', 'o': None})
         >>> s.str.translate(m)
         0      dg
         1     cXt
         2    bYrd
         dtype: object
         """

         @no_type_check
         def pandas_translate(s) -> "ps.Series[str]":
             return s.str.translate(table)

         return self._data.pandas_on_spark.transform_batch(pandas_translate)

     def wrap(self, width: int, **kwargs: bool) -> "ps.Series":
         """
         Wrap long strings in the Series to be formatted in paragraphs with
         length less than a given width.

         This method has the same keyword parameters and defaults as
         :class:`textwrap.TextWrapper`.

         Parameters
         ----------
         width : int
             Maximum line-width. Lines separated with newline char.
         expand_tabs : bool, optional
             If true, tab characters will be expanded to spaces (default: True).
         replace_whitespace : bool, optional
             If true, each whitespace character remaining after tab expansion
             will be replaced by a single space (default: True).
         drop_whitespace : bool, optional
             If true, whitespace that, after wrapping, happens to end up at the
             beginning or end of a line is dropped (default: True).
         break_long_words : bool, optional
             If true, then words longer than width will be broken in order to
             ensure that no lines are longer than width. If it is false, long
             words will not be broken, and some lines may be longer than width
             (default: True).
         break_on_hyphens : bool, optional
             If true, wrapping will occur preferably on whitespace and right
             after hyphens in compound words, as it is customary in English.
             If false, only whitespaces will be considered as potentially good
             places for line breaks, but you need to set break_long_words to
             false if you want truly insecable words (default: True).

         Returns
         -------
         Series of object
             Series with wrapped strings.

         Examples
         --------
         >>> s = ps.Series(['line to be wrapped', 'another line to be wrapped'])
         >>> s.str.wrap(12)
         0             line to be\\nwrapped
         1    another line\\nto be\\nwrapped
         dtype: object
         """

         @no_type_check
         def pandas_wrap(s) -> "ps.Series[str]":
             return s.str.wrap(width, **kwargs)

         return self._data.pandas_on_spark.transform_batch(pandas_wrap)

     def zfill(self, width: int) -> "ps.Series":
         """
         Pad strings in the Series by prepending ‘0’ characters.

         Strings in the Series are padded with ‘0’ characters on the left of the
         string to reach a total string length width. Strings in the Series with
         length greater or equal to width are unchanged.

         Differs from :func:`str.zfill` which has special handling for ‘+’/’-‘
         in the string.

         Parameters
         ----------
         width : int
             Minimum length of resulting string; strings with length less than
             width be prepended with ‘0’ characters.

         Returns
         -------
         Series of object
             Series with '0' left-padded strings.

         Examples
         --------
         >>> s = ps.Series(['-1', '1', '1000', np.nan])
         >>> s
         0      -1
         1       1
         2    1000
         3    None
         dtype: object

         Note that NaN is not a string, therefore it is converted to NaN. The
         minus sign in '-1' is treated as a regular character and the zero is
         added to the left of it (:func:`str.zfill` would have moved it to the
         left). 1000 remains unchanged as it is longer than width.

         >>> s.str.zfill(3)
         0     0-1
         1     001
         2    1000
         3    None
         dtype: object
         """

         @no_type_check
         def pandas_zfill(s) -> "ps.Series[str]":
             return s.str.zfill(width)

         return self._data.pandas_on_spark.transform_batch(pandas_zfill)

     @no_type_check
     def get_dummies(self, sep: str = "|") -> "ps.DataFrame":
         """
         Not supported.
         """
         raise NotImplementedError()


 def _test() -> None:
     import os
     import doctest
     import sys
     from pyspark.sql import SparkSession
     import pyspark.pandas.strings

     os.chdir(os.environ["SPARK_HOME"])

     globs = pyspark.pandas.strings.__dict__.copy()
     globs["ps"] = pyspark.pandas
     spark = (
         SparkSession.builder.master("local[4]")
         .appName("pyspark.pandas.strings tests")
         .getOrCreate()
     )
     (failure_count, test_count) = doctest.testmod(
         pyspark.pandas.strings,
         globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
     )
     spark.stop()
     if failure_count:
         sys.exit(-1)


 if __name__ == "__main__":
     _test()