blob: 26a92da0f8676aa452242f52a7537eb9fde3eb68 [file] [log] [blame]
datafusion.functions
====================
.. py:module:: datafusion.functions
.. autoapi-nested-parse::
User functions for operating on :py:class:`~datafusion.expr.Expr`.
Functions
---------
.. autoapisummary::
datafusion.functions.abs
datafusion.functions.acos
datafusion.functions.acosh
datafusion.functions.alias
datafusion.functions.approx_distinct
datafusion.functions.approx_median
datafusion.functions.approx_percentile_cont
datafusion.functions.approx_percentile_cont_with_weight
datafusion.functions.array
datafusion.functions.array_agg
datafusion.functions.array_append
datafusion.functions.array_cat
datafusion.functions.array_concat
datafusion.functions.array_dims
datafusion.functions.array_distinct
datafusion.functions.array_element
datafusion.functions.array_empty
datafusion.functions.array_except
datafusion.functions.array_extract
datafusion.functions.array_has
datafusion.functions.array_has_all
datafusion.functions.array_has_any
datafusion.functions.array_indexof
datafusion.functions.array_intersect
datafusion.functions.array_join
datafusion.functions.array_length
datafusion.functions.array_ndims
datafusion.functions.array_pop_back
datafusion.functions.array_pop_front
datafusion.functions.array_position
datafusion.functions.array_positions
datafusion.functions.array_prepend
datafusion.functions.array_push_back
datafusion.functions.array_push_front
datafusion.functions.array_remove
datafusion.functions.array_remove_all
datafusion.functions.array_remove_n
datafusion.functions.array_repeat
datafusion.functions.array_replace
datafusion.functions.array_replace_all
datafusion.functions.array_replace_n
datafusion.functions.array_resize
datafusion.functions.array_slice
datafusion.functions.array_sort
datafusion.functions.array_to_string
datafusion.functions.array_union
datafusion.functions.arrow_cast
datafusion.functions.arrow_typeof
datafusion.functions.ascii
datafusion.functions.asin
datafusion.functions.asinh
datafusion.functions.atan
datafusion.functions.atan2
datafusion.functions.atanh
datafusion.functions.avg
datafusion.functions.bit_and
datafusion.functions.bit_length
datafusion.functions.bit_or
datafusion.functions.bit_xor
datafusion.functions.bool_and
datafusion.functions.bool_or
datafusion.functions.btrim
datafusion.functions.cardinality
datafusion.functions.case
datafusion.functions.cbrt
datafusion.functions.ceil
datafusion.functions.char_length
datafusion.functions.character_length
datafusion.functions.chr
datafusion.functions.coalesce
datafusion.functions.col
datafusion.functions.concat
datafusion.functions.concat_ws
datafusion.functions.corr
datafusion.functions.cos
datafusion.functions.cosh
datafusion.functions.cot
datafusion.functions.count
datafusion.functions.count_star
datafusion.functions.covar
datafusion.functions.covar_pop
datafusion.functions.covar_samp
datafusion.functions.cume_dist
datafusion.functions.current_date
datafusion.functions.current_time
datafusion.functions.date_bin
datafusion.functions.date_part
datafusion.functions.date_trunc
datafusion.functions.datepart
datafusion.functions.datetrunc
datafusion.functions.decode
datafusion.functions.degrees
datafusion.functions.dense_rank
datafusion.functions.digest
datafusion.functions.empty
datafusion.functions.encode
datafusion.functions.ends_with
datafusion.functions.exp
datafusion.functions.extract
datafusion.functions.factorial
datafusion.functions.find_in_set
datafusion.functions.first_value
datafusion.functions.flatten
datafusion.functions.floor
datafusion.functions.from_unixtime
datafusion.functions.gcd
datafusion.functions.in_list
datafusion.functions.initcap
datafusion.functions.isnan
datafusion.functions.iszero
datafusion.functions.lag
datafusion.functions.last_value
datafusion.functions.lcm
datafusion.functions.lead
datafusion.functions.left
datafusion.functions.length
datafusion.functions.levenshtein
datafusion.functions.list_append
datafusion.functions.list_cat
datafusion.functions.list_concat
datafusion.functions.list_dims
datafusion.functions.list_distinct
datafusion.functions.list_element
datafusion.functions.list_except
datafusion.functions.list_extract
datafusion.functions.list_indexof
datafusion.functions.list_intersect
datafusion.functions.list_join
datafusion.functions.list_length
datafusion.functions.list_ndims
datafusion.functions.list_position
datafusion.functions.list_positions
datafusion.functions.list_prepend
datafusion.functions.list_push_back
datafusion.functions.list_push_front
datafusion.functions.list_remove
datafusion.functions.list_remove_all
datafusion.functions.list_remove_n
datafusion.functions.list_repeat
datafusion.functions.list_replace
datafusion.functions.list_replace_all
datafusion.functions.list_replace_n
datafusion.functions.list_resize
datafusion.functions.list_slice
datafusion.functions.list_sort
datafusion.functions.list_to_string
datafusion.functions.list_union
datafusion.functions.ln
datafusion.functions.log
datafusion.functions.log10
datafusion.functions.log2
datafusion.functions.lower
datafusion.functions.lpad
datafusion.functions.ltrim
datafusion.functions.make_array
datafusion.functions.make_date
datafusion.functions.make_list
datafusion.functions.max
datafusion.functions.md5
datafusion.functions.mean
datafusion.functions.median
datafusion.functions.min
datafusion.functions.named_struct
datafusion.functions.nanvl
datafusion.functions.now
datafusion.functions.nth_value
datafusion.functions.ntile
datafusion.functions.nullif
datafusion.functions.nvl
datafusion.functions.octet_length
datafusion.functions.order_by
datafusion.functions.overlay
datafusion.functions.percent_rank
datafusion.functions.pi
datafusion.functions.pow
datafusion.functions.power
datafusion.functions.radians
datafusion.functions.random
datafusion.functions.range
datafusion.functions.rank
datafusion.functions.regexp_count
datafusion.functions.regexp_like
datafusion.functions.regexp_match
datafusion.functions.regexp_replace
datafusion.functions.regr_avgx
datafusion.functions.regr_avgy
datafusion.functions.regr_count
datafusion.functions.regr_intercept
datafusion.functions.regr_r2
datafusion.functions.regr_slope
datafusion.functions.regr_sxx
datafusion.functions.regr_sxy
datafusion.functions.regr_syy
datafusion.functions.repeat
datafusion.functions.replace
datafusion.functions.reverse
datafusion.functions.right
datafusion.functions.round
datafusion.functions.row_number
datafusion.functions.rpad
datafusion.functions.rtrim
datafusion.functions.sha224
datafusion.functions.sha256
datafusion.functions.sha384
datafusion.functions.sha512
datafusion.functions.signum
datafusion.functions.sin
datafusion.functions.sinh
datafusion.functions.split_part
datafusion.functions.sqrt
datafusion.functions.starts_with
datafusion.functions.stddev
datafusion.functions.stddev_pop
datafusion.functions.stddev_samp
datafusion.functions.string_agg
datafusion.functions.strpos
datafusion.functions.struct
datafusion.functions.substr
datafusion.functions.substr_index
datafusion.functions.substring
datafusion.functions.sum
datafusion.functions.tan
datafusion.functions.tanh
datafusion.functions.to_hex
datafusion.functions.to_timestamp
datafusion.functions.to_timestamp_micros
datafusion.functions.to_timestamp_millis
datafusion.functions.to_timestamp_nanos
datafusion.functions.to_timestamp_seconds
datafusion.functions.to_unixtime
datafusion.functions.translate
datafusion.functions.trim
datafusion.functions.trunc
datafusion.functions.upper
datafusion.functions.uuid
datafusion.functions.var
datafusion.functions.var_pop
datafusion.functions.var_samp
datafusion.functions.var_sample
datafusion.functions.when
datafusion.functions.window
Module Contents
---------------
.. py:function:: abs(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Return the absolute value of a given number.
Returns:
--------
Expr
A new expression representing the absolute value of the input expression.
.. py:function:: acos(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the arc cosine or inverse cosine of a number.
Returns:
--------
Expr
A new expression representing the arc cosine of the input expression.
.. py:function:: acosh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns inverse hyperbolic cosine.
.. py:function:: alias(expr: datafusion.expr.Expr, name: str, metadata: Optional[dict[str, str]] = None) -> datafusion.expr.Expr
Creates an alias expression with an optional metadata dictionary.
:param expr: The expression to alias
:param name: The alias name
:param metadata: Optional metadata to attach to the column
:returns: An expression with the given alias
.. py:function:: approx_distinct(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the approximate number of distinct values.
This aggregate function is similar to :py:func:`count` with distinct set, but it
will approximate the number of distinct entries. It may return significantly faster
than :py:func:`count` for some DataFrames.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Values to check for distinct entries
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: approx_median(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the approximate median value.
This aggregate function is similar to :py:func:`median`, but it will only
approximate the median. It may return significantly faster for some DataFrames.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by`` and ``null_treatment``, and ``distinct``.
:param expression: Values to find the median for
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: approx_percentile_cont(expression: datafusion.expr.Expr, percentile: float, num_centroids: Optional[int] = None, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the value that is approximately at a given percentile of ``expr``.
This aggregate function assumes the input values form a continuous distribution.
Suppose you have a DataFrame which consists of 100 different test scores. If you
called this function with a percentile of 0.9, it would return the value of the
test score that is above 90% of the other test scores. The returned value may be
between two of the values.
This function uses the [t-digest](https://arxiv.org/abs/1902.04023) algorithm to
compute the percentil. You can limit the number of bins used in this algorithm by
setting the ``num_centroids`` parameter.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Values for which to find the approximate percentile
:param percentile: This must be between 0.0 and 1.0, inclusive
:param num_centroids: Max bin size for the t-digest algorithm
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: approx_percentile_cont_with_weight(expression: datafusion.expr.Expr, weight: datafusion.expr.Expr, percentile: float, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the value of the weighted approximate percentile.
This aggregate function is similar to :py:func:`approx_percentile_cont` except that
it uses the associated associated weights.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Values for which to find the approximate percentile
:param weight: Relative weight for each of the values in ``expression``
:param percentile: This must be between 0.0 and 1.0, inclusive
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: array(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array using the specified input expressions.
This is an alias for :py:func:`make_array`.
.. py:function:: array_agg(expression: datafusion.expr.Expr, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Aggregate values into an array.
Currently ``distinct`` and ``order_by`` cannot be used together. As a work around,
consider :py:func:`array_sort` after aggregation.
[Issue Tracker](https://github.com/apache/datafusion/issues/12371)
If using the builder functions described in ref:`_aggregation` this function ignores
the option ``null_treatment``.
:param expression: Values to combine into an array
:param distinct: If True, a single entry for each distinct value will be in the result
:param filter: If provided, only compute against rows for which the filter is True
:param order_by: Order the resultant array values
.. py:function:: array_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Appends an element to the end of an array.
.. py:function:: array_cat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Concatenates the input arrays.
This is an alias for :py:func:`array_concat`.
.. py:function:: array_concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Concatenates the input arrays.
.. py:function:: array_dims(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array of the array's dimensions.
.. py:function:: array_distinct(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns distinct values from the array after removing duplicates.
.. py:function:: array_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Extracts the element with the index n from the array.
.. py:function:: array_empty(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns a boolean indicating whether the array is empty.
.. py:function:: array_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the elements that appear in ``array1`` but not in ``array2``.
.. py:function:: array_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Extracts the element with the index n from the array.
This is an alias for :py:func:`array_element`.
.. py:function:: array_has(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns true if the element appears in the first array, otherwise false.
.. py:function:: array_has_all(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr
Determines if there is complete overlap ``second_array`` in ``first_array``.
Returns true if each element of the second array appears in the first array.
Otherwise, it returns false.
.. py:function:: array_has_any(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr
Determine if there is an overlap between ``first_array`` and ``second_array``.
Returns true if at least one element of the second array appears in the first
array. Otherwise, it returns false.
.. py:function:: array_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr
Return the position of the first occurrence of ``element`` in ``array``.
This is an alias for :py:func:`array_position`.
.. py:function:: array_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the intersection of ``array1`` and ``array2``.
.. py:function:: array_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts each element to its text representation.
This is an alias for :py:func:`array_to_string`.
.. py:function:: array_length(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the length of the array.
.. py:function:: array_ndims(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the number of dimensions of the array.
.. py:function:: array_pop_back(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the array without the last element.
.. py:function:: array_pop_front(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the array without the first element.
.. py:function:: array_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr
Return the position of the first occurrence of ``element`` in ``array``.
.. py:function:: array_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Searches for an element in the array and returns all occurrences.
.. py:function:: array_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr
Prepends an element to the beginning of an array.
.. py:function:: array_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Appends an element to the end of an array.
This is an alias for :py:func:`array_append`.
.. py:function:: array_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr
Prepends an element to the beginning of an array.
This is an alias for :py:func:`array_prepend`.
.. py:function:: array_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes the first element from the array equal to the given value.
.. py:function:: array_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes all elements from the array equal to the given value.
.. py:function:: array_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes the first ``max`` elements from the array equal to the given value.
.. py:function:: array_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array containing ``element`` ``count`` times.
.. py:function:: array_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr
Replaces the first occurrence of ``from_val`` with ``to_val``.
.. py:function:: array_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr
Replaces all occurrences of ``from_val`` with ``to_val``.
.. py:function:: array_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr
Replace ``n`` occurrences of ``from_val`` with ``to_val``.
Replaces the first ``max`` occurrences of the specified element with another
specified element.
.. py:function:: array_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array with the specified size filled.
If ``size`` is greater than the ``array`` length, the additional entries will
be filled with the given ``value``.
.. py:function:: array_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Returns a slice of the array.
.. py:function:: array_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) -> datafusion.expr.Expr
Sort an array.
:param array: The input array to sort.
:param descending: If True, sorts in descending order.
:param null_first: If True, nulls will be returned at the beginning of the array.
.. py:function:: array_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts each element to its text representation.
.. py:function:: array_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array of the elements in the union of array1 and array2.
Duplicate rows will not be returned.
.. py:function:: arrow_cast(expr: datafusion.expr.Expr, data_type: datafusion.expr.Expr) -> datafusion.expr.Expr
Casts an expression to a specified data type.
.. py:function:: arrow_typeof(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the Arrow type of the expression.
.. py:function:: ascii(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the numeric code of the first character of the argument.
.. py:function:: asin(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the arc sine or inverse sine of a number.
.. py:function:: asinh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns inverse hyperbolic sine.
.. py:function:: atan(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns inverse tangent of a number.
.. py:function:: atan2(y: datafusion.expr.Expr, x: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns inverse tangent of a division given in the argument.
.. py:function:: atanh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns inverse hyperbolic tangent.
.. py:function:: avg(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the average value.
This aggregate function expects a numeric expression and will return a float.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Values to combine into an array
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: bit_and(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the bitwise AND of the argument.
This aggregate function will bitwise compare every value in the input partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Argument to perform bitwise calculation on
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: bit_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the number of bits in the string argument.
.. py:function:: bit_or(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the bitwise OR of the argument.
This aggregate function will bitwise compare every value in the input partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Argument to perform bitwise calculation on
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: bit_xor(expression: datafusion.expr.Expr, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the bitwise XOR of the argument.
This aggregate function will bitwise compare every value in the input partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by`` and ``null_treatment``.
:param expression: Argument to perform bitwise calculation on
:param distinct: If True, evaluate each unique value of expression only once
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: bool_and(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the boolean AND of the argument.
This aggregate function will compare every value in the input partition. These are
expected to be boolean values.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Argument to perform calculation on
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: bool_or(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the boolean OR of the argument.
This aggregate function will compare every value in the input partition. These are
expected to be boolean values.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Argument to perform calculation on
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: btrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes all characters, spaces by default, from both sides of a string.
.. py:function:: cardinality(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the total number of elements in the array.
.. py:function:: case(expr: datafusion.expr.Expr) -> datafusion.expr.CaseBuilder
Create a case expression.
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
detailed usage.
.. py:function:: cbrt(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the cube root of a number.
.. py:function:: ceil(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the nearest integer greater than or equal to argument.
.. py:function:: char_length(string: datafusion.expr.Expr) -> datafusion.expr.Expr
The number of characters in the ``string``.
.. py:function:: character_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the number of characters in the argument.
.. py:function:: chr(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts the Unicode code point to a UTF8 character.
.. py:function:: coalesce(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the value of the first expr in ``args`` which is not NULL.
.. py:function:: col(name: str) -> datafusion.expr.Expr
Creates a column reference expression.
.. py:function:: concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Concatenates the text representations of all the arguments.
NULL arguments are ignored.
.. py:function:: concat_ws(separator: str, *args: datafusion.expr.Expr) -> datafusion.expr.Expr
Concatenates the list ``args`` with the separator.
``NULL`` arguments are ignored. ``separator`` should not be ``NULL``.
.. py:function:: corr(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the correlation coefficient between ``value1`` and ``value2``.
This aggregate function expects both values to be numeric and will return a float.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param value_y: The dependent variable for correlation
:param value_x: The independent variable for correlation
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: cos(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the cosine of the argument.
.. py:function:: cosh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the hyperbolic cosine of the argument.
.. py:function:: cot(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the cotangent of the argument.
.. py:function:: count(expressions: datafusion.expr.Expr | list[datafusion.expr.Expr] | None = None, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the number of rows that match the given arguments.
This aggregate function will count the non-null rows provided in the expression.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by`` and ``null_treatment``.
:param expressions: Argument to perform bitwise calculation on
:param distinct: If True, a single entry for each distinct value will be in the result
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: count_star(filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Create a COUNT(1) aggregate expression.
This aggregate function will count all of the rows in the partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``distinct``, and ``null_treatment``.
:param filter: If provided, only count rows for which the filter is True
.. py:function:: covar(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sample covariance.
This is an alias for :py:func:`covar_samp`.
.. py:function:: covar_pop(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the population covariance.
This aggregate function expects both values to be numeric and will return a float.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param value_y: The dependent variable for covariance
:param value_x: The independent variable for covariance
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: covar_samp(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sample covariance.
This aggregate function expects both values to be numeric and will return a float.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param value_y: The dependent variable for covariance
:param value_x: The independent variable for covariance
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: cume_dist(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a cumulative distribution window function.
This window function is similar to :py:func:`rank` except that the returned values
are the ratio of the row number to the total numebr of rows. Here is an example of a
dataframe with a window ordered by descending ``points`` and the associated
cumulative distribution::
+--------+-----------+
| points | cume_dist |
+--------+-----------+
| 100 | 0.5 |
| 100 | 0.5 |
| 50 | 0.75 |
| 25 | 1.0 |
+--------+-----------+
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: current_date() -> datafusion.expr.Expr
Returns current UTC date as a Date32 value.
.. py:function:: current_time() -> datafusion.expr.Expr
Returns current UTC time as a Time64 value.
.. py:function:: date_bin(stride: datafusion.expr.Expr, source: datafusion.expr.Expr, origin: datafusion.expr.Expr) -> datafusion.expr.Expr
Coerces an arbitrary timestamp to the start of the nearest specified interval.
.. py:function:: date_part(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr
Extracts a subfield from the date.
.. py:function:: date_trunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr
Truncates the date to a specified level of precision.
.. py:function:: datepart(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr
Return a specified part of a date.
This is an alias for :py:func:`date_part`.
.. py:function:: datetrunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr
Truncates the date to a specified level of precision.
This is an alias for :py:func:`date_trunc`.
.. py:function:: decode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) -> datafusion.expr.Expr
Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.
.. py:function:: degrees(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts the argument from radians to degrees.
.. py:function:: dense_rank(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a dense_rank window function.
This window function is similar to :py:func:`rank` except that the returned values
will be consecutive. Here is an example of a dataframe with a window ordered by
descending ``points`` and the associated dense rank::
+--------+------------+
| points | dense_rank |
+--------+------------+
| 100 | 1 |
| 100 | 1 |
| 50 | 2 |
| 25 | 3 |
+--------+------------+
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: digest(value: datafusion.expr.Expr, method: datafusion.expr.Expr) -> datafusion.expr.Expr
Computes the binary hash of an expression using the specified algorithm.
Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s,
blake2b, and blake3.
.. py:function:: empty(array: datafusion.expr.Expr) -> datafusion.expr.Expr
This is an alias for :py:func:`array_empty`.
.. py:function:: encode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) -> datafusion.expr.Expr
Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.
.. py:function:: ends_with(arg: datafusion.expr.Expr, suffix: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns true if the ``string`` ends with the ``suffix``, false otherwise.
.. py:function:: exp(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the exponential of the argument.
.. py:function:: extract(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr
Extracts a subfield from the date.
This is an alias for :py:func:`date_part`.
.. py:function:: factorial(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the factorial of the argument.
.. py:function:: find_in_set(string: datafusion.expr.Expr, string_list: datafusion.expr.Expr) -> datafusion.expr.Expr
Find a string in a list of strings.
Returns a value in the range of 1 to N if the string is in the string list
``string_list`` consisting of N substrings.
The string list is a string composed of substrings separated by ``,`` characters.
.. py:function:: first_value(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr
Returns the first value in a group of values.
This aggregate function will return the first value in the partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the option ``distinct``.
:param expression: Argument to perform bitwise calculation on
:param filter: If provided, only compute against rows for which the filter is True
:param order_by: Set the ordering of the expression to evaluate
:param null_treatment: Assign whether to respect or ignore null values.
.. py:function:: flatten(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Flattens an array of arrays into a single array.
.. py:function:: floor(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the nearest integer less than or equal to the argument.
.. py:function:: from_unixtime(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts an integer to RFC3339 timestamp format string.
.. py:function:: gcd(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the greatest common divisor.
.. py:function:: in_list(arg: datafusion.expr.Expr, values: list[datafusion.expr.Expr], negated: bool = False) -> datafusion.expr.Expr
Returns whether the argument is contained within the list ``values``.
.. py:function:: initcap(string: datafusion.expr.Expr) -> datafusion.expr.Expr
Set the initial letter of each word to capital.
Converts the first letter of each word in ``string`` to uppercase and the remaining
characters to lowercase.
.. py:function:: isnan(expr: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns true if a given number is +NaN or -NaN otherwise returns false.
.. py:function:: iszero(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns true if a given number is +0.0 or -0.0 otherwise returns false.
.. py:function:: lag(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Optional[Any] = None, partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a lag window function.
Lag operation will return the argument that is in the previous shift_offset-th row
in the partition. For example ``lag(col("b"), shift_offset=3, default_value=5)``
will return the 3rd previous value in column ``b``. At the beginnig of the
partition, where no values can be returned it will return the default value of 5.
Here is an example of both the ``lag`` and :py:func:`datafusion.functions.lead`
functions on a simple DataFrame::
+--------+------+-----+
| points | lead | lag |
+--------+------+-----+
| 100 | 100 | |
| 100 | 50 | 100 |
| 50 | 25 | 100 |
| 25 | | 50 |
+--------+------+-----+
:param arg: Value to return
:param shift_offset: Number of rows before the current row.
:param default_value: Value to return if shift_offet row does not exist.
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: last_value(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr
Returns the last value in a group of values.
This aggregate function will return the last value in the partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the option ``distinct``.
:param expression: Argument to perform bitwise calculation on
:param filter: If provided, only compute against rows for which the filter is True
:param order_by: Set the ordering of the expression to evaluate
:param null_treatment: Assign whether to respect or ignore null values.
.. py:function:: lcm(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the least common multiple.
.. py:function:: lead(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Optional[Any] = None, partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a lead window function.
Lead operation will return the argument that is in the next shift_offset-th row in
the partition. For example ``lead(col("b"), shift_offset=3, default_value=5)`` will
return the 3rd following value in column ``b``. At the end of the partition, where
no futher values can be returned it will return the default value of 5.
Here is an example of both the ``lead`` and :py:func:`datafusion.functions.lag`
functions on a simple DataFrame::
+--------+------+-----+
| points | lead | lag |
+--------+------+-----+
| 100 | 100 | |
| 100 | 50 | 100 |
| 50 | 25 | 100 |
| 25 | | 50 |
+--------+------+-----+
To set window function parameters use the window builder approach described in the
ref:`_window_functions` online documentation.
:param arg: Value to return
:param shift_offset: Number of rows following the current row.
:param default_value: Value to return if shift_offet row does not exist.
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: left(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the first ``n`` characters in the ``string``.
.. py:function:: length(string: datafusion.expr.Expr) -> datafusion.expr.Expr
The number of characters in the ``string``.
.. py:function:: levenshtein(string1: datafusion.expr.Expr, string2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the Levenshtein distance between the two given strings.
.. py:function:: list_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Appends an element to the end of an array.
This is an alias for :py:func:`array_append`.
.. py:function:: list_cat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Concatenates the input arrays.
This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
.. py:function:: list_concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Concatenates the input arrays.
This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
.. py:function:: list_dims(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array of the array's dimensions.
This is an alias for :py:func:`array_dims`.
.. py:function:: list_distinct(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns distinct values from the array after removing duplicates.
This is an alias for :py:func:`array_distinct`.
.. py:function:: list_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Extracts the element with the index n from the array.
This is an alias for :py:func:`array_element`.
.. py:function:: list_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the elements that appear in ``array1`` but not in the ``array2``.
This is an alias for :py:func:`array_except`.
.. py:function:: list_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Extracts the element with the index n from the array.
This is an alias for :py:func:`array_element`.
.. py:function:: list_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr
Return the position of the first occurrence of ``element`` in ``array``.
This is an alias for :py:func:`array_position`.
.. py:function:: list_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an the intersection of ``array1`` and ``array2``.
This is an alias for :py:func:`array_intersect`.
.. py:function:: list_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts each element to its text representation.
This is an alias for :py:func:`array_to_string`.
.. py:function:: list_length(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the length of the array.
This is an alias for :py:func:`array_length`.
.. py:function:: list_ndims(array: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the number of dimensions of the array.
This is an alias for :py:func:`array_ndims`.
.. py:function:: list_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr
Return the position of the first occurrence of ``element`` in ``array``.
This is an alias for :py:func:`array_position`.
.. py:function:: list_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Searches for an element in the array and returns all occurrences.
This is an alias for :py:func:`array_positions`.
.. py:function:: list_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr
Prepends an element to the beginning of an array.
This is an alias for :py:func:`array_prepend`.
.. py:function:: list_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Appends an element to the end of an array.
This is an alias for :py:func:`array_append`.
.. py:function:: list_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr
Prepends an element to the beginning of an array.
This is an alias for :py:func:`array_prepend`.
.. py:function:: list_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes the first element from the array equal to the given value.
This is an alias for :py:func:`array_remove`.
.. py:function:: list_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes all elements from the array equal to the given value.
This is an alias for :py:func:`array_remove_all`.
.. py:function:: list_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes the first ``max`` elements from the array equal to the given value.
This is an alias for :py:func:`array_remove_n`.
.. py:function:: list_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array containing ``element`` ``count`` times.
This is an alias for :py:func:`array_repeat`.
.. py:function:: list_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr
Replaces the first occurrence of ``from_val`` with ``to_val``.
This is an alias for :py:func:`array_replace`.
.. py:function:: list_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr
Replaces all occurrences of ``from_val`` with ``to_val``.
This is an alias for :py:func:`array_replace_all`.
.. py:function:: list_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr
Replace ``n`` occurrences of ``from_val`` with ``to_val``.
Replaces the first ``max`` occurrences of the specified element with another
specified element.
This is an alias for :py:func:`array_replace_n`.
.. py:function:: list_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array with the specified size filled.
If ``size`` is greater than the ``array`` length, the additional entries will be
filled with the given ``value``. This is an alias for :py:func:`array_resize`.
.. py:function:: list_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Returns a slice of the array.
This is an alias for :py:func:`array_slice`.
.. py:function:: list_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) -> datafusion.expr.Expr
This is an alias for :py:func:`array_sort`.
.. py:function:: list_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts each element to its text representation.
This is an alias for :py:func:`array_to_string`.
.. py:function:: list_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array of the elements in the union of array1 and array2.
Duplicate rows will not be returned.
This is an alias for :py:func:`array_union`.
.. py:function:: ln(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the natural logarithm (base e) of the argument.
.. py:function:: log(base: datafusion.expr.Expr, num: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the logarithm of a number for a particular ``base``.
.. py:function:: log10(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Base 10 logarithm of the argument.
.. py:function:: log2(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Base 2 logarithm of the argument.
.. py:function:: lower(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string to lowercase.
.. py:function:: lpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Add left padding to a string.
Extends the string to length length by prepending the characters fill (a
space by default). If the string is already longer than length then it is
truncated (on the right).
.. py:function:: ltrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes all characters, spaces by default, from the beginning of a string.
.. py:function:: make_array(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array using the specified input expressions.
.. py:function:: make_date(year: datafusion.expr.Expr, month: datafusion.expr.Expr, day: datafusion.expr.Expr) -> datafusion.expr.Expr
Make a date from year, month and day component parts.
.. py:function:: make_list(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an array using the specified input expressions.
This is an alias for :py:func:`make_array`.
.. py:function:: max(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Aggregate function that returns the maximum value of the argument.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: The value to find the maximum of
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: md5(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Computes an MD5 128-bit checksum for a string expression.
.. py:function:: mean(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the average (mean) value of the argument.
This is an alias for :py:func:`avg`.
.. py:function:: median(expression: datafusion.expr.Expr, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the median of a set of numbers.
This aggregate function returns the median value of the expression for the given
aggregate function.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by`` and ``null_treatment``.
:param expression: The value to compute the median of
:param distinct: If True, a single entry for each distinct value will be in the result
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: min(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Returns the minimum value of the argument.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: The value to find the minimum of
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: named_struct(name_pairs: list[tuple[str, datafusion.expr.Expr]]) -> datafusion.expr.Expr
Returns a struct with the given names and arguments pairs.
.. py:function:: nanvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.
.. py:function:: now() -> datafusion.expr.Expr
Returns the current timestamp in nanoseconds.
This will use the same value for all instances of now() in same statement.
.. py:function:: nth_value(expression: datafusion.expr.Expr, n: int, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr
Returns the n-th value in a group of values.
This aggregate function will return the n-th value in the partition.
If using the builder functions described in ref:`_aggregation` this function ignores
the option ``distinct``.
:param expression: Argument to perform bitwise calculation on
:param n: Index of value to return. Starts at 1.
:param filter: If provided, only compute against rows for which the filter is True
:param order_by: Set the ordering of the expression to evaluate
:param null_treatment: Assign whether to respect or ignore null values.
.. py:function:: ntile(groups: int, partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a n-tile window function.
This window function orders the window frame into a give number of groups based on
the ordering criteria. It then returns which group the current row is assigned to.
Here is an example of a dataframe with a window ordered by descending ``points``
and the associated n-tile function::
+--------+-------+
| points | ntile |
+--------+-------+
| 120 | 1 |
| 100 | 1 |
| 80 | 2 |
| 60 | 2 |
| 40 | 3 |
| 20 | 3 |
+--------+-------+
:param groups: Number of groups for the n-tile to be divided into.
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: nullif(expr1: datafusion.expr.Expr, expr2: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns NULL if expr1 equals expr2; otherwise it returns expr1.
This can be used to perform the inverse operation of the COALESCE expression.
.. py:function:: nvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``.
.. py:function:: octet_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the number of bytes of a string.
.. py:function:: order_by(expr: datafusion.expr.Expr, ascending: bool = True, nulls_first: bool = True) -> datafusion.expr.SortExpr
Creates a new sort expression.
.. py:function:: overlay(string: datafusion.expr.Expr, substring: datafusion.expr.Expr, start: datafusion.expr.Expr, length: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Replace a substring with a new substring.
Replace the substring of string that starts at the ``start``'th character and
extends for ``length`` characters with new substring.
.. py:function:: percent_rank(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a percent_rank window function.
This window function is similar to :py:func:`rank` except that the returned values
are the percentage from 0.0 to 1.0 from first to last. Here is an example of a
dataframe with a window ordered by descending ``points`` and the associated percent
rank::
+--------+--------------+
| points | percent_rank |
+--------+--------------+
| 100 | 0.0 |
| 100 | 0.0 |
| 50 | 0.666667 |
| 25 | 1.0 |
+--------+--------------+
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: pi() -> datafusion.expr.Expr
Returns an approximate value of π.
.. py:function:: pow(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns ``base`` raised to the power of ``exponent``.
This is an alias of :py:func:`power`.
.. py:function:: power(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns ``base`` raised to the power of ``exponent``.
.. py:function:: radians(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts the argument from degrees to radians.
.. py:function:: random() -> datafusion.expr.Expr
Returns a random value in the range ``0.0 <= x < 1.0``.
.. py:function:: range(start: datafusion.expr.Expr, stop: datafusion.expr.Expr, step: datafusion.expr.Expr) -> datafusion.expr.Expr
Create a list of values in the range between start and stop.
.. py:function:: rank(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a rank window function.
Returns the rank based upon the window order. Consecutive equal values will receive
the same rank, but the next different value will not be consecutive but rather the
number of rows that preceed it plus one. This is similar to Olympic medals. If two
people tie for gold, the next place is bronze. There would be no silver medal. Here
is an example of a dataframe with a window ordered by descending ``points`` and the
associated rank.
You should set ``order_by`` to produce meaningful results::
+--------+------+
| points | rank |
+--------+------+
| 100 | 1 |
| 100 | 1 |
| 50 | 3 |
| 25 | 4 |
+--------+------+
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: regexp_count(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, start: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Returns the number of matches in a string.
Optional start position (the first position is 1) to search for the regular
expression.
.. py:function:: regexp_like(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Find if any regular expression (regex) matches exist.
Tests a string using a regular expression returning true if at least one match,
false otherwise.
.. py:function:: regexp_match(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Perform regular expression (regex) matching.
Returns an array with each element containing the leftmost-first match of the
corresponding index in ``regex`` to string in ``string``.
.. py:function:: regexp_replace(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, replacement: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Replaces substring(s) matching a PCRE-like regular expression.
The full list of supported features and syntax can be found at
<https://docs.rs/regex/latest/regex/#syntax>
Supported flags with the addition of 'g' can be found at
<https://docs.rs/regex/latest/regex/#grouping-and-flags>
.. py:function:: regr_avgx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the average of the independent variable ``x``.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_avgy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the average of the dependent variable ``y``.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_count(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Counts the number of rows in which both expressions are not null.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_intercept(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the intercept from the linear regression.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_r2(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the R-squared value from linear regression.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_slope(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the slope from linear regression.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_sxx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sum of squares of the independent variable ``x``.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_sxy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sum of products of pairs of numbers.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: regr_syy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sum of squares of the dependent variable ``y``.
This is a linear regression aggregate function. Only non-null pairs of the inputs
are evaluated.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param y: The linear regression dependent variable
:param x: The linear regression independent variable
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: repeat(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Repeats the ``string`` to ``n`` times.
.. py:function:: replace(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr
Replaces all occurrences of ``from_val`` with ``to_val`` in the ``string``.
.. py:function:: reverse(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Reverse the string argument.
.. py:function:: right(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the last ``n`` characters in the ``string``.
.. py:function:: round(value: datafusion.expr.Expr, decimal_places: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Round the argument to the nearest integer.
If the optional ``decimal_places`` is specified, round to the nearest number of
decimal places. You can specify a negative number of decimal places. For example
``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.
.. py:function:: row_number(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Create a row number window function.
Returns the row number of the window function.
Here is an example of the ``row_number`` on a simple DataFrame::
+--------+------------+
| points | row number |
+--------+------------+
| 100 | 1 |
| 100 | 2 |
| 50 | 3 |
| 25 | 4 |
+--------+------------+
:param partition_by: Expressions to partition the window frame on.
:param order_by: Set ordering within the window frame.
.. py:function:: rpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Add right padding to a string.
Extends the string to length length by appending the characters fill (a space
by default). If the string is already longer than length then it is truncated.
.. py:function:: rtrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes all characters, spaces by default, from the end of a string.
.. py:function:: sha224(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Computes the SHA-224 hash of a binary string.
.. py:function:: sha256(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Computes the SHA-256 hash of a binary string.
.. py:function:: sha384(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Computes the SHA-384 hash of a binary string.
.. py:function:: sha512(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Computes the SHA-512 hash of a binary string.
.. py:function:: signum(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the sign of the argument (-1, 0, +1).
.. py:function:: sin(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the sine of the argument.
.. py:function:: sinh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the hyperbolic sine of the argument.
.. py:function:: split_part(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, index: datafusion.expr.Expr) -> datafusion.expr.Expr
Split a string and return one part.
Splits a string based on a delimiter and picks out the desired field based
on the index.
.. py:function:: sqrt(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the square root of the argument.
.. py:function:: starts_with(string: datafusion.expr.Expr, prefix: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns true if string starts with prefix.
.. py:function:: stddev(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the standard deviation of the argument.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: The value to find the minimum of
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: stddev_pop(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the population standard deviation of the argument.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: The value to find the minimum of
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: stddev_samp(arg: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sample standard deviation of the argument.
This is an alias for :py:func:`stddev`.
.. py:function:: string_agg(expression: datafusion.expr.Expr, delimiter: str, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr
Concatenates the input strings.
This aggregate function will concatenate input strings, ignoring null values, and
seperating them with the specified delimiter. Non-string values will be converted to
their string equivalents.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``distinct`` and ``null_treatment``.
:param expression: Argument to perform bitwise calculation on
:param delimiter: Text to place between each value of expression
:param filter: If provided, only compute against rows for which the filter is True
:param order_by: Set the ordering of the expression to evaluate
.. py:function:: strpos(string: datafusion.expr.Expr, substring: datafusion.expr.Expr) -> datafusion.expr.Expr
Finds the position from where the ``substring`` matches the ``string``.
.. py:function:: struct(*args: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns a struct with the given arguments.
.. py:function:: substr(string: datafusion.expr.Expr, position: datafusion.expr.Expr) -> datafusion.expr.Expr
Substring from the ``position`` to the end.
.. py:function:: substr_index(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns an indexed substring.
The return will be the ``string`` from before ``count`` occurrences of
``delimiter``.
.. py:function:: substring(string: datafusion.expr.Expr, position: datafusion.expr.Expr, length: datafusion.expr.Expr) -> datafusion.expr.Expr
Substring from the ``position`` with ``length`` characters.
.. py:function:: sum(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sum of a set of numbers.
This aggregate function expects a numeric expression.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: Values to combine into an array
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: tan(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the tangent of the argument.
.. py:function:: tanh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Returns the hyperbolic tangent of the argument.
.. py:function:: to_hex(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts an integer to a hexadecimal string.
.. py:function:: to_timestamp(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
.. py:function:: to_timestamp_micros(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string and optional formats to a ``Timestamp`` in microseconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
.. py:function:: to_timestamp_millis(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string and optional formats to a ``Timestamp`` in milliseconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
.. py:function:: to_timestamp_nanos(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
.. py:function:: to_timestamp_seconds(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string and optional formats to a ``Timestamp`` in seconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
.. py:function:: to_unixtime(string: datafusion.expr.Expr, *format_arguments: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string and optional formats to a Unixtime.
.. py:function:: translate(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr
Replaces the characters in ``from_val`` with the counterpart in ``to_val``.
.. py:function:: trim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Removes all characters, spaces by default, from both sides of a string.
.. py:function:: trunc(num: datafusion.expr.Expr, precision: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr
Truncate the number toward zero with optional precision.
.. py:function:: upper(arg: datafusion.expr.Expr) -> datafusion.expr.Expr
Converts a string to uppercase.
.. py:function:: uuid() -> datafusion.expr.Expr
Returns uuid v4 as a string value.
.. py:function:: var(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sample variance of the argument.
This is an alias for :py:func:`var_samp`.
.. py:function:: var_pop(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the population variance of the argument.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: The variable to compute the variance for
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: var_samp(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sample variance of the argument.
If using the builder functions described in ref:`_aggregation` this function ignores
the options ``order_by``, ``null_treatment``, and ``distinct``.
:param expression: The variable to compute the variance for
:param filter: If provided, only compute against rows for which the filter is True
.. py:function:: var_sample(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr
Computes the sample variance of the argument.
This is an alias for :py:func:`var_samp`.
.. py:function:: when(when: datafusion.expr.Expr, then: datafusion.expr.Expr) -> datafusion.expr.CaseBuilder
Create a case expression that has no base expression.
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
detailed usage.
.. py:function:: window(name: str, args: list[datafusion.expr.Expr], partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr | None = None, window_frame: datafusion.expr.WindowFrame | None = None, ctx: datafusion.context.SessionContext | None = None) -> datafusion.expr.Expr
Creates a new Window function expression.
This interface will soon be deprecated. Instead of using this interface,
users should call the window functions directly. For example, to perform a
lag use::
df.select(functions.lag(col("a")).partition_by(col("b")).build())