| datafusion.functions |
| ==================== |
| |
| .. py:module:: datafusion.functions |
| |
| .. autoapi-nested-parse:: |
| |
| User functions for operating on :py:class:`~datafusion.expr.Expr`. |
| |
| |
| |
| Functions |
| --------- |
| |
| .. autoapisummary:: |
| |
| datafusion.functions.abs |
| datafusion.functions.acos |
| datafusion.functions.acosh |
| datafusion.functions.alias |
| datafusion.functions.approx_distinct |
| datafusion.functions.approx_median |
| datafusion.functions.approx_percentile_cont |
| datafusion.functions.approx_percentile_cont_with_weight |
| datafusion.functions.array |
| datafusion.functions.array_agg |
| datafusion.functions.array_append |
| datafusion.functions.array_cat |
| datafusion.functions.array_concat |
| datafusion.functions.array_dims |
| datafusion.functions.array_distinct |
| datafusion.functions.array_element |
| datafusion.functions.array_empty |
| datafusion.functions.array_except |
| datafusion.functions.array_extract |
| datafusion.functions.array_has |
| datafusion.functions.array_has_all |
| datafusion.functions.array_has_any |
| datafusion.functions.array_indexof |
| datafusion.functions.array_intersect |
| datafusion.functions.array_join |
| datafusion.functions.array_length |
| datafusion.functions.array_ndims |
| datafusion.functions.array_pop_back |
| datafusion.functions.array_pop_front |
| datafusion.functions.array_position |
| datafusion.functions.array_positions |
| datafusion.functions.array_prepend |
| datafusion.functions.array_push_back |
| datafusion.functions.array_push_front |
| datafusion.functions.array_remove |
| datafusion.functions.array_remove_all |
| datafusion.functions.array_remove_n |
| datafusion.functions.array_repeat |
| datafusion.functions.array_replace |
| datafusion.functions.array_replace_all |
| datafusion.functions.array_replace_n |
| datafusion.functions.array_resize |
| datafusion.functions.array_slice |
| datafusion.functions.array_sort |
| datafusion.functions.array_to_string |
| datafusion.functions.array_union |
| datafusion.functions.arrow_cast |
| datafusion.functions.arrow_typeof |
| datafusion.functions.ascii |
| datafusion.functions.asin |
| datafusion.functions.asinh |
| datafusion.functions.atan |
| datafusion.functions.atan2 |
| datafusion.functions.atanh |
| datafusion.functions.avg |
| datafusion.functions.bit_and |
| datafusion.functions.bit_length |
| datafusion.functions.bit_or |
| datafusion.functions.bit_xor |
| datafusion.functions.bool_and |
| datafusion.functions.bool_or |
| datafusion.functions.btrim |
| datafusion.functions.cardinality |
| datafusion.functions.case |
| datafusion.functions.cbrt |
| datafusion.functions.ceil |
| datafusion.functions.char_length |
| datafusion.functions.character_length |
| datafusion.functions.chr |
| datafusion.functions.coalesce |
| datafusion.functions.col |
| datafusion.functions.concat |
| datafusion.functions.concat_ws |
| datafusion.functions.corr |
| datafusion.functions.cos |
| datafusion.functions.cosh |
| datafusion.functions.cot |
| datafusion.functions.count |
| datafusion.functions.count_star |
| datafusion.functions.covar |
| datafusion.functions.covar_pop |
| datafusion.functions.covar_samp |
| datafusion.functions.cume_dist |
| datafusion.functions.current_date |
| datafusion.functions.current_time |
| datafusion.functions.date_bin |
| datafusion.functions.date_part |
| datafusion.functions.date_trunc |
| datafusion.functions.datepart |
| datafusion.functions.datetrunc |
| datafusion.functions.decode |
| datafusion.functions.degrees |
| datafusion.functions.dense_rank |
| datafusion.functions.digest |
| datafusion.functions.empty |
| datafusion.functions.encode |
| datafusion.functions.ends_with |
| datafusion.functions.exp |
| datafusion.functions.extract |
| datafusion.functions.factorial |
| datafusion.functions.find_in_set |
| datafusion.functions.first_value |
| datafusion.functions.flatten |
| datafusion.functions.floor |
| datafusion.functions.from_unixtime |
| datafusion.functions.gcd |
| datafusion.functions.in_list |
| datafusion.functions.initcap |
| datafusion.functions.isnan |
| datafusion.functions.iszero |
| datafusion.functions.lag |
| datafusion.functions.last_value |
| datafusion.functions.lcm |
| datafusion.functions.lead |
| datafusion.functions.left |
| datafusion.functions.length |
| datafusion.functions.levenshtein |
| datafusion.functions.list_append |
| datafusion.functions.list_cat |
| datafusion.functions.list_concat |
| datafusion.functions.list_dims |
| datafusion.functions.list_distinct |
| datafusion.functions.list_element |
| datafusion.functions.list_except |
| datafusion.functions.list_extract |
| datafusion.functions.list_indexof |
| datafusion.functions.list_intersect |
| datafusion.functions.list_join |
| datafusion.functions.list_length |
| datafusion.functions.list_ndims |
| datafusion.functions.list_position |
| datafusion.functions.list_positions |
| datafusion.functions.list_prepend |
| datafusion.functions.list_push_back |
| datafusion.functions.list_push_front |
| datafusion.functions.list_remove |
| datafusion.functions.list_remove_all |
| datafusion.functions.list_remove_n |
| datafusion.functions.list_repeat |
| datafusion.functions.list_replace |
| datafusion.functions.list_replace_all |
| datafusion.functions.list_replace_n |
| datafusion.functions.list_resize |
| datafusion.functions.list_slice |
| datafusion.functions.list_sort |
| datafusion.functions.list_to_string |
| datafusion.functions.list_union |
| datafusion.functions.ln |
| datafusion.functions.log |
| datafusion.functions.log10 |
| datafusion.functions.log2 |
| datafusion.functions.lower |
| datafusion.functions.lpad |
| datafusion.functions.ltrim |
| datafusion.functions.make_array |
| datafusion.functions.make_date |
| datafusion.functions.make_list |
| datafusion.functions.max |
| datafusion.functions.md5 |
| datafusion.functions.mean |
| datafusion.functions.median |
| datafusion.functions.min |
| datafusion.functions.named_struct |
| datafusion.functions.nanvl |
| datafusion.functions.now |
| datafusion.functions.nth_value |
| datafusion.functions.ntile |
| datafusion.functions.nullif |
| datafusion.functions.nvl |
| datafusion.functions.octet_length |
| datafusion.functions.order_by |
| datafusion.functions.overlay |
| datafusion.functions.percent_rank |
| datafusion.functions.pi |
| datafusion.functions.pow |
| datafusion.functions.power |
| datafusion.functions.radians |
| datafusion.functions.random |
| datafusion.functions.range |
| datafusion.functions.rank |
| datafusion.functions.regexp_count |
| datafusion.functions.regexp_like |
| datafusion.functions.regexp_match |
| datafusion.functions.regexp_replace |
| datafusion.functions.regr_avgx |
| datafusion.functions.regr_avgy |
| datafusion.functions.regr_count |
| datafusion.functions.regr_intercept |
| datafusion.functions.regr_r2 |
| datafusion.functions.regr_slope |
| datafusion.functions.regr_sxx |
| datafusion.functions.regr_sxy |
| datafusion.functions.regr_syy |
| datafusion.functions.repeat |
| datafusion.functions.replace |
| datafusion.functions.reverse |
| datafusion.functions.right |
| datafusion.functions.round |
| datafusion.functions.row_number |
| datafusion.functions.rpad |
| datafusion.functions.rtrim |
| datafusion.functions.sha224 |
| datafusion.functions.sha256 |
| datafusion.functions.sha384 |
| datafusion.functions.sha512 |
| datafusion.functions.signum |
| datafusion.functions.sin |
| datafusion.functions.sinh |
| datafusion.functions.split_part |
| datafusion.functions.sqrt |
| datafusion.functions.starts_with |
| datafusion.functions.stddev |
| datafusion.functions.stddev_pop |
| datafusion.functions.stddev_samp |
| datafusion.functions.string_agg |
| datafusion.functions.strpos |
| datafusion.functions.struct |
| datafusion.functions.substr |
| datafusion.functions.substr_index |
| datafusion.functions.substring |
| datafusion.functions.sum |
| datafusion.functions.tan |
| datafusion.functions.tanh |
| datafusion.functions.to_hex |
| datafusion.functions.to_timestamp |
| datafusion.functions.to_timestamp_micros |
| datafusion.functions.to_timestamp_millis |
| datafusion.functions.to_timestamp_nanos |
| datafusion.functions.to_timestamp_seconds |
| datafusion.functions.to_unixtime |
| datafusion.functions.translate |
| datafusion.functions.trim |
| datafusion.functions.trunc |
| datafusion.functions.upper |
| datafusion.functions.uuid |
| datafusion.functions.var |
| datafusion.functions.var_pop |
| datafusion.functions.var_samp |
| datafusion.functions.var_sample |
| datafusion.functions.when |
| datafusion.functions.window |
| |
| |
| Module Contents |
| --------------- |
| |
| .. py:function:: abs(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Return the absolute value of a given number. |
| |
| Returns: |
| -------- |
| Expr |
| A new expression representing the absolute value of the input expression. |
| |
| |
| .. py:function:: acos(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the arc cosine or inverse cosine of a number. |
| |
| Returns: |
| -------- |
| Expr |
| A new expression representing the arc cosine of the input expression. |
| |
| |
| .. py:function:: acosh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns inverse hyperbolic cosine. |
| |
| |
| .. py:function:: alias(expr: datafusion.expr.Expr, name: str, metadata: Optional[dict[str, str]] = None) -> datafusion.expr.Expr |
| |
| Creates an alias expression with an optional metadata dictionary. |
| |
| :param expr: The expression to alias |
| :param name: The alias name |
| :param metadata: Optional metadata to attach to the column |
| |
| :returns: An expression with the given alias |
| |
| |
| .. py:function:: approx_distinct(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the approximate number of distinct values. |
| |
| This aggregate function is similar to :py:func:`count` with distinct set, but it |
| will approximate the number of distinct entries. It may return significantly faster |
| than :py:func:`count` for some DataFrames. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Values to check for distinct entries |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: approx_median(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the approximate median value. |
| |
| This aggregate function is similar to :py:func:`median`, but it will only |
| approximate the median. It may return significantly faster for some DataFrames. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by`` and ``null_treatment``, and ``distinct``. |
| |
| :param expression: Values to find the median for |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: approx_percentile_cont(expression: datafusion.expr.Expr, percentile: float, num_centroids: Optional[int] = None, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the value that is approximately at a given percentile of ``expr``. |
| |
| This aggregate function assumes the input values form a continuous distribution. |
| Suppose you have a DataFrame which consists of 100 different test scores. If you |
| called this function with a percentile of 0.9, it would return the value of the |
| test score that is above 90% of the other test scores. The returned value may be |
| between two of the values. |
| |
| This function uses the [t-digest](https://arxiv.org/abs/1902.04023) algorithm to |
| compute the percentil. You can limit the number of bins used in this algorithm by |
| setting the ``num_centroids`` parameter. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Values for which to find the approximate percentile |
| :param percentile: This must be between 0.0 and 1.0, inclusive |
| :param num_centroids: Max bin size for the t-digest algorithm |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: approx_percentile_cont_with_weight(expression: datafusion.expr.Expr, weight: datafusion.expr.Expr, percentile: float, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the value of the weighted approximate percentile. |
| |
| This aggregate function is similar to :py:func:`approx_percentile_cont` except that |
| it uses the associated associated weights. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Values for which to find the approximate percentile |
| :param weight: Relative weight for each of the values in ``expression`` |
| :param percentile: This must be between 0.0 and 1.0, inclusive |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: array(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array using the specified input expressions. |
| |
| This is an alias for :py:func:`make_array`. |
| |
| |
| .. py:function:: array_agg(expression: datafusion.expr.Expr, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Aggregate values into an array. |
| |
| Currently ``distinct`` and ``order_by`` cannot be used together. As a work around, |
| consider :py:func:`array_sort` after aggregation. |
| [Issue Tracker](https://github.com/apache/datafusion/issues/12371) |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the option ``null_treatment``. |
| |
| :param expression: Values to combine into an array |
| :param distinct: If True, a single entry for each distinct value will be in the result |
| :param filter: If provided, only compute against rows for which the filter is True |
| :param order_by: Order the resultant array values |
| |
| |
| .. py:function:: array_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Appends an element to the end of an array. |
| |
| |
| .. py:function:: array_cat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Concatenates the input arrays. |
| |
| This is an alias for :py:func:`array_concat`. |
| |
| |
| .. py:function:: array_concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Concatenates the input arrays. |
| |
| |
| .. py:function:: array_dims(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array of the array's dimensions. |
| |
| |
| .. py:function:: array_distinct(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns distinct values from the array after removing duplicates. |
| |
| |
| .. py:function:: array_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Extracts the element with the index n from the array. |
| |
| |
| .. py:function:: array_empty(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns a boolean indicating whether the array is empty. |
| |
| |
| .. py:function:: array_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the elements that appear in ``array1`` but not in ``array2``. |
| |
| |
| .. py:function:: array_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Extracts the element with the index n from the array. |
| |
| This is an alias for :py:func:`array_element`. |
| |
| |
| .. py:function:: array_has(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns true if the element appears in the first array, otherwise false. |
| |
| |
| .. py:function:: array_has_all(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Determines if there is complete overlap ``second_array`` in ``first_array``. |
| |
| Returns true if each element of the second array appears in the first array. |
| Otherwise, it returns false. |
| |
| |
| .. py:function:: array_has_any(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Determine if there is an overlap between ``first_array`` and ``second_array``. |
| |
| Returns true if at least one element of the second array appears in the first |
| array. Otherwise, it returns false. |
| |
| |
| .. py:function:: array_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr |
| |
| Return the position of the first occurrence of ``element`` in ``array``. |
| |
| This is an alias for :py:func:`array_position`. |
| |
| |
| .. py:function:: array_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the intersection of ``array1`` and ``array2``. |
| |
| |
| .. py:function:: array_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts each element to its text representation. |
| |
| This is an alias for :py:func:`array_to_string`. |
| |
| |
| .. py:function:: array_length(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the length of the array. |
| |
| |
| .. py:function:: array_ndims(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the number of dimensions of the array. |
| |
| |
| .. py:function:: array_pop_back(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the array without the last element. |
| |
| |
| .. py:function:: array_pop_front(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the array without the first element. |
| |
| |
| .. py:function:: array_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr |
| |
| Return the position of the first occurrence of ``element`` in ``array``. |
| |
| |
| .. py:function:: array_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Searches for an element in the array and returns all occurrences. |
| |
| |
| .. py:function:: array_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Prepends an element to the beginning of an array. |
| |
| |
| .. py:function:: array_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Appends an element to the end of an array. |
| |
| This is an alias for :py:func:`array_append`. |
| |
| |
| .. py:function:: array_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Prepends an element to the beginning of an array. |
| |
| This is an alias for :py:func:`array_prepend`. |
| |
| |
| .. py:function:: array_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes the first element from the array equal to the given value. |
| |
| |
| .. py:function:: array_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes all elements from the array equal to the given value. |
| |
| |
| .. py:function:: array_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes the first ``max`` elements from the array equal to the given value. |
| |
| |
| .. py:function:: array_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array containing ``element`` ``count`` times. |
| |
| |
| .. py:function:: array_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replaces the first occurrence of ``from_val`` with ``to_val``. |
| |
| |
| .. py:function:: array_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replaces all occurrences of ``from_val`` with ``to_val``. |
| |
| |
| .. py:function:: array_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replace ``n`` occurrences of ``from_val`` with ``to_val``. |
| |
| Replaces the first ``max`` occurrences of the specified element with another |
| specified element. |
| |
| |
| .. py:function:: array_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array with the specified size filled. |
| |
| If ``size`` is greater than the ``array`` length, the additional entries will |
| be filled with the given ``value``. |
| |
| |
| .. py:function:: array_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Returns a slice of the array. |
| |
| |
| .. py:function:: array_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) -> datafusion.expr.Expr |
| |
| Sort an array. |
| |
| :param array: The input array to sort. |
| :param descending: If True, sorts in descending order. |
| :param null_first: If True, nulls will be returned at the beginning of the array. |
| |
| |
| .. py:function:: array_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts each element to its text representation. |
| |
| |
| .. py:function:: array_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array of the elements in the union of array1 and array2. |
| |
| Duplicate rows will not be returned. |
| |
| |
| .. py:function:: arrow_cast(expr: datafusion.expr.Expr, data_type: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Casts an expression to a specified data type. |
| |
| |
| .. py:function:: arrow_typeof(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the Arrow type of the expression. |
| |
| |
| .. py:function:: ascii(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the numeric code of the first character of the argument. |
| |
| |
| .. py:function:: asin(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the arc sine or inverse sine of a number. |
| |
| |
| .. py:function:: asinh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns inverse hyperbolic sine. |
| |
| |
| .. py:function:: atan(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns inverse tangent of a number. |
| |
| |
| .. py:function:: atan2(y: datafusion.expr.Expr, x: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns inverse tangent of a division given in the argument. |
| |
| |
| .. py:function:: atanh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns inverse hyperbolic tangent. |
| |
| |
| .. py:function:: avg(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the average value. |
| |
| This aggregate function expects a numeric expression and will return a float. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Values to combine into an array |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: bit_and(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the bitwise AND of the argument. |
| |
| This aggregate function will bitwise compare every value in the input partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: bit_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the number of bits in the string argument. |
| |
| |
| .. py:function:: bit_or(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the bitwise OR of the argument. |
| |
| This aggregate function will bitwise compare every value in the input partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: bit_xor(expression: datafusion.expr.Expr, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the bitwise XOR of the argument. |
| |
| This aggregate function will bitwise compare every value in the input partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by`` and ``null_treatment``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param distinct: If True, evaluate each unique value of expression only once |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: bool_and(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the boolean AND of the argument. |
| |
| This aggregate function will compare every value in the input partition. These are |
| expected to be boolean values. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Argument to perform calculation on |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: bool_or(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the boolean OR of the argument. |
| |
| This aggregate function will compare every value in the input partition. These are |
| expected to be boolean values. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Argument to perform calculation on |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: btrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes all characters, spaces by default, from both sides of a string. |
| |
| |
| .. py:function:: cardinality(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the total number of elements in the array. |
| |
| |
| .. py:function:: case(expr: datafusion.expr.Expr) -> datafusion.expr.CaseBuilder |
| |
| Create a case expression. |
| |
| Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the |
| expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for |
| detailed usage. |
| |
| |
| .. py:function:: cbrt(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the cube root of a number. |
| |
| |
| .. py:function:: ceil(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the nearest integer greater than or equal to argument. |
| |
| |
| .. py:function:: char_length(string: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| The number of characters in the ``string``. |
| |
| |
| .. py:function:: character_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the number of characters in the argument. |
| |
| |
| .. py:function:: chr(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts the Unicode code point to a UTF8 character. |
| |
| |
| .. py:function:: coalesce(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the value of the first expr in ``args`` which is not NULL. |
| |
| |
| .. py:function:: col(name: str) -> datafusion.expr.Expr |
| |
| Creates a column reference expression. |
| |
| |
| .. py:function:: concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Concatenates the text representations of all the arguments. |
| |
| NULL arguments are ignored. |
| |
| |
| .. py:function:: concat_ws(separator: str, *args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Concatenates the list ``args`` with the separator. |
| |
| ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. |
| |
| |
| .. py:function:: corr(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the correlation coefficient between ``value1`` and ``value2``. |
| |
| This aggregate function expects both values to be numeric and will return a float. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param value_y: The dependent variable for correlation |
| :param value_x: The independent variable for correlation |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: cos(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the cosine of the argument. |
| |
| |
| .. py:function:: cosh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the hyperbolic cosine of the argument. |
| |
| |
| .. py:function:: cot(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the cotangent of the argument. |
| |
| |
| .. py:function:: count(expressions: datafusion.expr.Expr | list[datafusion.expr.Expr] | None = None, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the number of rows that match the given arguments. |
| |
| This aggregate function will count the non-null rows provided in the expression. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by`` and ``null_treatment``. |
| |
| :param expressions: Argument to perform bitwise calculation on |
| :param distinct: If True, a single entry for each distinct value will be in the result |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: count_star(filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Create a COUNT(1) aggregate expression. |
| |
| This aggregate function will count all of the rows in the partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``distinct``, and ``null_treatment``. |
| |
| :param filter: If provided, only count rows for which the filter is True |
| |
| |
| .. py:function:: covar(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sample covariance. |
| |
| This is an alias for :py:func:`covar_samp`. |
| |
| |
| .. py:function:: covar_pop(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the population covariance. |
| |
| This aggregate function expects both values to be numeric and will return a float. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param value_y: The dependent variable for covariance |
| :param value_x: The independent variable for covariance |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: covar_samp(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sample covariance. |
| |
| This aggregate function expects both values to be numeric and will return a float. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param value_y: The dependent variable for covariance |
| :param value_x: The independent variable for covariance |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: cume_dist(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a cumulative distribution window function. |
| |
| This window function is similar to :py:func:`rank` except that the returned values |
| are the ratio of the row number to the total numebr of rows. Here is an example of a |
| dataframe with a window ordered by descending ``points`` and the associated |
| cumulative distribution:: |
| |
| +--------+-----------+ |
| | points | cume_dist | |
| +--------+-----------+ |
| | 100 | 0.5 | |
| | 100 | 0.5 | |
| | 50 | 0.75 | |
| | 25 | 1.0 | |
| +--------+-----------+ |
| |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: current_date() -> datafusion.expr.Expr |
| |
| Returns current UTC date as a Date32 value. |
| |
| |
| .. py:function:: current_time() -> datafusion.expr.Expr |
| |
| Returns current UTC time as a Time64 value. |
| |
| |
| .. py:function:: date_bin(stride: datafusion.expr.Expr, source: datafusion.expr.Expr, origin: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Coerces an arbitrary timestamp to the start of the nearest specified interval. |
| |
| |
| .. py:function:: date_part(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Extracts a subfield from the date. |
| |
| |
| .. py:function:: date_trunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Truncates the date to a specified level of precision. |
| |
| |
| .. py:function:: datepart(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Return a specified part of a date. |
| |
| This is an alias for :py:func:`date_part`. |
| |
| |
| .. py:function:: datetrunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Truncates the date to a specified level of precision. |
| |
| This is an alias for :py:func:`date_trunc`. |
| |
| |
| .. py:function:: decode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Decode the ``input``, using the ``encoding``. encoding can be base64 or hex. |
| |
| |
| .. py:function:: degrees(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts the argument from radians to degrees. |
| |
| |
| .. py:function:: dense_rank(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a dense_rank window function. |
| |
| This window function is similar to :py:func:`rank` except that the returned values |
| will be consecutive. Here is an example of a dataframe with a window ordered by |
| descending ``points`` and the associated dense rank:: |
| |
| +--------+------------+ |
| | points | dense_rank | |
| +--------+------------+ |
| | 100 | 1 | |
| | 100 | 1 | |
| | 50 | 2 | |
| | 25 | 3 | |
| +--------+------------+ |
| |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: digest(value: datafusion.expr.Expr, method: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Computes the binary hash of an expression using the specified algorithm. |
| |
| Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, |
| blake2b, and blake3. |
| |
| |
| .. py:function:: empty(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| This is an alias for :py:func:`array_empty`. |
| |
| |
| .. py:function:: encode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Encode the ``input``, using the ``encoding``. encoding can be base64 or hex. |
| |
| |
| .. py:function:: ends_with(arg: datafusion.expr.Expr, suffix: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns true if the ``string`` ends with the ``suffix``, false otherwise. |
| |
| |
| .. py:function:: exp(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the exponential of the argument. |
| |
| |
| .. py:function:: extract(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Extracts a subfield from the date. |
| |
| This is an alias for :py:func:`date_part`. |
| |
| |
| .. py:function:: factorial(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the factorial of the argument. |
| |
| |
| .. py:function:: find_in_set(string: datafusion.expr.Expr, string_list: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Find a string in a list of strings. |
| |
| Returns a value in the range of 1 to N if the string is in the string list |
| ``string_list`` consisting of N substrings. |
| |
| The string list is a string composed of substrings separated by ``,`` characters. |
| |
| |
| .. py:function:: first_value(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr |
| |
| Returns the first value in a group of values. |
| |
| This aggregate function will return the first value in the partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the option ``distinct``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param filter: If provided, only compute against rows for which the filter is True |
| :param order_by: Set the ordering of the expression to evaluate |
| :param null_treatment: Assign whether to respect or ignore null values. |
| |
| |
| .. py:function:: flatten(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Flattens an array of arrays into a single array. |
| |
| |
| .. py:function:: floor(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the nearest integer less than or equal to the argument. |
| |
| |
| .. py:function:: from_unixtime(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts an integer to RFC3339 timestamp format string. |
| |
| |
| .. py:function:: gcd(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the greatest common divisor. |
| |
| |
| .. py:function:: in_list(arg: datafusion.expr.Expr, values: list[datafusion.expr.Expr], negated: bool = False) -> datafusion.expr.Expr |
| |
| Returns whether the argument is contained within the list ``values``. |
| |
| |
| .. py:function:: initcap(string: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Set the initial letter of each word to capital. |
| |
| Converts the first letter of each word in ``string`` to uppercase and the remaining |
| characters to lowercase. |
| |
| |
| .. py:function:: isnan(expr: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns true if a given number is +NaN or -NaN otherwise returns false. |
| |
| |
| .. py:function:: iszero(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns true if a given number is +0.0 or -0.0 otherwise returns false. |
| |
| |
| .. py:function:: lag(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Optional[Any] = None, partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a lag window function. |
| |
| Lag operation will return the argument that is in the previous shift_offset-th row |
| in the partition. For example ``lag(col("b"), shift_offset=3, default_value=5)`` |
| will return the 3rd previous value in column ``b``. At the beginnig of the |
| partition, where no values can be returned it will return the default value of 5. |
| |
| Here is an example of both the ``lag`` and :py:func:`datafusion.functions.lead` |
| functions on a simple DataFrame:: |
| |
| +--------+------+-----+ |
| | points | lead | lag | |
| +--------+------+-----+ |
| | 100 | 100 | | |
| | 100 | 50 | 100 | |
| | 50 | 25 | 100 | |
| | 25 | | 50 | |
| +--------+------+-----+ |
| |
| :param arg: Value to return |
| :param shift_offset: Number of rows before the current row. |
| :param default_value: Value to return if shift_offet row does not exist. |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: last_value(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr |
| |
| Returns the last value in a group of values. |
| |
| This aggregate function will return the last value in the partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the option ``distinct``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param filter: If provided, only compute against rows for which the filter is True |
| :param order_by: Set the ordering of the expression to evaluate |
| :param null_treatment: Assign whether to respect or ignore null values. |
| |
| |
| .. py:function:: lcm(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the least common multiple. |
| |
| |
| .. py:function:: lead(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Optional[Any] = None, partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a lead window function. |
| |
| Lead operation will return the argument that is in the next shift_offset-th row in |
| the partition. For example ``lead(col("b"), shift_offset=3, default_value=5)`` will |
| return the 3rd following value in column ``b``. At the end of the partition, where |
| no futher values can be returned it will return the default value of 5. |
| |
| Here is an example of both the ``lead`` and :py:func:`datafusion.functions.lag` |
| functions on a simple DataFrame:: |
| |
| +--------+------+-----+ |
| | points | lead | lag | |
| +--------+------+-----+ |
| | 100 | 100 | | |
| | 100 | 50 | 100 | |
| | 50 | 25 | 100 | |
| | 25 | | 50 | |
| +--------+------+-----+ |
| |
| To set window function parameters use the window builder approach described in the |
| ref:`_window_functions` online documentation. |
| |
| :param arg: Value to return |
| :param shift_offset: Number of rows following the current row. |
| :param default_value: Value to return if shift_offet row does not exist. |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: left(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the first ``n`` characters in the ``string``. |
| |
| |
| .. py:function:: length(string: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| The number of characters in the ``string``. |
| |
| |
| .. py:function:: levenshtein(string1: datafusion.expr.Expr, string2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the Levenshtein distance between the two given strings. |
| |
| |
| .. py:function:: list_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Appends an element to the end of an array. |
| |
| This is an alias for :py:func:`array_append`. |
| |
| |
| .. py:function:: list_cat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Concatenates the input arrays. |
| |
| This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. |
| |
| |
| .. py:function:: list_concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Concatenates the input arrays. |
| |
| This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. |
| |
| |
| .. py:function:: list_dims(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array of the array's dimensions. |
| |
| This is an alias for :py:func:`array_dims`. |
| |
| |
| .. py:function:: list_distinct(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns distinct values from the array after removing duplicates. |
| |
| This is an alias for :py:func:`array_distinct`. |
| |
| |
| .. py:function:: list_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Extracts the element with the index n from the array. |
| |
| This is an alias for :py:func:`array_element`. |
| |
| |
| .. py:function:: list_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the elements that appear in ``array1`` but not in the ``array2``. |
| |
| This is an alias for :py:func:`array_except`. |
| |
| |
| .. py:function:: list_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Extracts the element with the index n from the array. |
| |
| This is an alias for :py:func:`array_element`. |
| |
| |
| .. py:function:: list_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr |
| |
| Return the position of the first occurrence of ``element`` in ``array``. |
| |
| This is an alias for :py:func:`array_position`. |
| |
| |
| .. py:function:: list_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an the intersection of ``array1`` and ``array2``. |
| |
| This is an alias for :py:func:`array_intersect`. |
| |
| |
| .. py:function:: list_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts each element to its text representation. |
| |
| This is an alias for :py:func:`array_to_string`. |
| |
| |
| .. py:function:: list_length(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the length of the array. |
| |
| This is an alias for :py:func:`array_length`. |
| |
| |
| .. py:function:: list_ndims(array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the number of dimensions of the array. |
| |
| This is an alias for :py:func:`array_ndims`. |
| |
| |
| .. py:function:: list_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr |
| |
| Return the position of the first occurrence of ``element`` in ``array``. |
| |
| This is an alias for :py:func:`array_position`. |
| |
| |
| .. py:function:: list_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Searches for an element in the array and returns all occurrences. |
| |
| This is an alias for :py:func:`array_positions`. |
| |
| |
| .. py:function:: list_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Prepends an element to the beginning of an array. |
| |
| This is an alias for :py:func:`array_prepend`. |
| |
| |
| .. py:function:: list_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Appends an element to the end of an array. |
| |
| This is an alias for :py:func:`array_append`. |
| |
| |
| .. py:function:: list_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Prepends an element to the beginning of an array. |
| |
| This is an alias for :py:func:`array_prepend`. |
| |
| |
| .. py:function:: list_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes the first element from the array equal to the given value. |
| |
| This is an alias for :py:func:`array_remove`. |
| |
| |
| .. py:function:: list_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes all elements from the array equal to the given value. |
| |
| This is an alias for :py:func:`array_remove_all`. |
| |
| |
| .. py:function:: list_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes the first ``max`` elements from the array equal to the given value. |
| |
| This is an alias for :py:func:`array_remove_n`. |
| |
| |
| .. py:function:: list_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array containing ``element`` ``count`` times. |
| |
| This is an alias for :py:func:`array_repeat`. |
| |
| |
| .. py:function:: list_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replaces the first occurrence of ``from_val`` with ``to_val``. |
| |
| This is an alias for :py:func:`array_replace`. |
| |
| |
| .. py:function:: list_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replaces all occurrences of ``from_val`` with ``to_val``. |
| |
| This is an alias for :py:func:`array_replace_all`. |
| |
| |
| .. py:function:: list_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replace ``n`` occurrences of ``from_val`` with ``to_val``. |
| |
| Replaces the first ``max`` occurrences of the specified element with another |
| specified element. |
| |
| This is an alias for :py:func:`array_replace_n`. |
| |
| |
| .. py:function:: list_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array with the specified size filled. |
| |
| If ``size`` is greater than the ``array`` length, the additional entries will be |
| filled with the given ``value``. This is an alias for :py:func:`array_resize`. |
| |
| |
| .. py:function:: list_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Returns a slice of the array. |
| |
| This is an alias for :py:func:`array_slice`. |
| |
| |
| .. py:function:: list_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) -> datafusion.expr.Expr |
| |
| This is an alias for :py:func:`array_sort`. |
| |
| |
| .. py:function:: list_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts each element to its text representation. |
| |
| This is an alias for :py:func:`array_to_string`. |
| |
| |
| .. py:function:: list_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array of the elements in the union of array1 and array2. |
| |
| Duplicate rows will not be returned. |
| |
| This is an alias for :py:func:`array_union`. |
| |
| |
| .. py:function:: ln(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the natural logarithm (base e) of the argument. |
| |
| |
| .. py:function:: log(base: datafusion.expr.Expr, num: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the logarithm of a number for a particular ``base``. |
| |
| |
| .. py:function:: log10(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Base 10 logarithm of the argument. |
| |
| |
| .. py:function:: log2(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Base 2 logarithm of the argument. |
| |
| |
| .. py:function:: lower(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string to lowercase. |
| |
| |
| .. py:function:: lpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Add left padding to a string. |
| |
| Extends the string to length length by prepending the characters fill (a |
| space by default). If the string is already longer than length then it is |
| truncated (on the right). |
| |
| |
| .. py:function:: ltrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes all characters, spaces by default, from the beginning of a string. |
| |
| |
| .. py:function:: make_array(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array using the specified input expressions. |
| |
| |
| .. py:function:: make_date(year: datafusion.expr.Expr, month: datafusion.expr.Expr, day: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Make a date from year, month and day component parts. |
| |
| |
| .. py:function:: make_list(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an array using the specified input expressions. |
| |
| This is an alias for :py:func:`make_array`. |
| |
| |
| .. py:function:: max(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Aggregate function that returns the maximum value of the argument. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: The value to find the maximum of |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: md5(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Computes an MD5 128-bit checksum for a string expression. |
| |
| |
| .. py:function:: mean(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the average (mean) value of the argument. |
| |
| This is an alias for :py:func:`avg`. |
| |
| |
| .. py:function:: median(expression: datafusion.expr.Expr, distinct: bool = False, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the median of a set of numbers. |
| |
| This aggregate function returns the median value of the expression for the given |
| aggregate function. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by`` and ``null_treatment``. |
| |
| :param expression: The value to compute the median of |
| :param distinct: If True, a single entry for each distinct value will be in the result |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: min(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Returns the minimum value of the argument. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: The value to find the minimum of |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: named_struct(name_pairs: list[tuple[str, datafusion.expr.Expr]]) -> datafusion.expr.Expr |
| |
| Returns a struct with the given names and arguments pairs. |
| |
| |
| .. py:function:: nanvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``. |
| |
| |
| .. py:function:: now() -> datafusion.expr.Expr |
| |
| Returns the current timestamp in nanoseconds. |
| |
| This will use the same value for all instances of now() in same statement. |
| |
| |
| .. py:function:: nth_value(expression: datafusion.expr.Expr, n: int, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr |
| |
| Returns the n-th value in a group of values. |
| |
| This aggregate function will return the n-th value in the partition. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the option ``distinct``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param n: Index of value to return. Starts at 1. |
| :param filter: If provided, only compute against rows for which the filter is True |
| :param order_by: Set the ordering of the expression to evaluate |
| :param null_treatment: Assign whether to respect or ignore null values. |
| |
| |
| .. py:function:: ntile(groups: int, partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a n-tile window function. |
| |
| This window function orders the window frame into a give number of groups based on |
| the ordering criteria. It then returns which group the current row is assigned to. |
| Here is an example of a dataframe with a window ordered by descending ``points`` |
| and the associated n-tile function:: |
| |
| +--------+-------+ |
| | points | ntile | |
| +--------+-------+ |
| | 120 | 1 | |
| | 100 | 1 | |
| | 80 | 2 | |
| | 60 | 2 | |
| | 40 | 3 | |
| | 20 | 3 | |
| +--------+-------+ |
| |
| :param groups: Number of groups for the n-tile to be divided into. |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: nullif(expr1: datafusion.expr.Expr, expr2: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns NULL if expr1 equals expr2; otherwise it returns expr1. |
| |
| This can be used to perform the inverse operation of the COALESCE expression. |
| |
| |
| .. py:function:: nvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``. |
| |
| |
| .. py:function:: octet_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the number of bytes of a string. |
| |
| |
| .. py:function:: order_by(expr: datafusion.expr.Expr, ascending: bool = True, nulls_first: bool = True) -> datafusion.expr.SortExpr |
| |
| Creates a new sort expression. |
| |
| |
| .. py:function:: overlay(string: datafusion.expr.Expr, substring: datafusion.expr.Expr, start: datafusion.expr.Expr, length: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Replace a substring with a new substring. |
| |
| Replace the substring of string that starts at the ``start``'th character and |
| extends for ``length`` characters with new substring. |
| |
| |
| .. py:function:: percent_rank(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a percent_rank window function. |
| |
| This window function is similar to :py:func:`rank` except that the returned values |
| are the percentage from 0.0 to 1.0 from first to last. Here is an example of a |
| dataframe with a window ordered by descending ``points`` and the associated percent |
| rank:: |
| |
| +--------+--------------+ |
| | points | percent_rank | |
| +--------+--------------+ |
| | 100 | 0.0 | |
| | 100 | 0.0 | |
| | 50 | 0.666667 | |
| | 25 | 1.0 | |
| +--------+--------------+ |
| |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: pi() -> datafusion.expr.Expr |
| |
| Returns an approximate value of π. |
| |
| |
| .. py:function:: pow(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns ``base`` raised to the power of ``exponent``. |
| |
| This is an alias of :py:func:`power`. |
| |
| |
| .. py:function:: power(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns ``base`` raised to the power of ``exponent``. |
| |
| |
| .. py:function:: radians(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts the argument from degrees to radians. |
| |
| |
| .. py:function:: random() -> datafusion.expr.Expr |
| |
| Returns a random value in the range ``0.0 <= x < 1.0``. |
| |
| |
| .. py:function:: range(start: datafusion.expr.Expr, stop: datafusion.expr.Expr, step: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Create a list of values in the range between start and stop. |
| |
| |
| .. py:function:: rank(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a rank window function. |
| |
| Returns the rank based upon the window order. Consecutive equal values will receive |
| the same rank, but the next different value will not be consecutive but rather the |
| number of rows that preceed it plus one. This is similar to Olympic medals. If two |
| people tie for gold, the next place is bronze. There would be no silver medal. Here |
| is an example of a dataframe with a window ordered by descending ``points`` and the |
| associated rank. |
| |
| You should set ``order_by`` to produce meaningful results:: |
| |
| +--------+------+ |
| | points | rank | |
| +--------+------+ |
| | 100 | 1 | |
| | 100 | 1 | |
| | 50 | 3 | |
| | 25 | 4 | |
| +--------+------+ |
| |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: regexp_count(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, start: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Returns the number of matches in a string. |
| |
| Optional start position (the first position is 1) to search for the regular |
| expression. |
| |
| |
| .. py:function:: regexp_like(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Find if any regular expression (regex) matches exist. |
| |
| Tests a string using a regular expression returning true if at least one match, |
| false otherwise. |
| |
| |
| .. py:function:: regexp_match(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Perform regular expression (regex) matching. |
| |
| Returns an array with each element containing the leftmost-first match of the |
| corresponding index in ``regex`` to string in ``string``. |
| |
| |
| .. py:function:: regexp_replace(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, replacement: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Replaces substring(s) matching a PCRE-like regular expression. |
| |
| The full list of supported features and syntax can be found at |
| <https://docs.rs/regex/latest/regex/#syntax> |
| |
| Supported flags with the addition of 'g' can be found at |
| <https://docs.rs/regex/latest/regex/#grouping-and-flags> |
| |
| |
| .. py:function:: regr_avgx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the average of the independent variable ``x``. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_avgy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the average of the dependent variable ``y``. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_count(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Counts the number of rows in which both expressions are not null. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_intercept(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the intercept from the linear regression. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_r2(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the R-squared value from linear regression. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_slope(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the slope from linear regression. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_sxx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sum of squares of the independent variable ``x``. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_sxy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sum of products of pairs of numbers. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: regr_syy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sum of squares of the dependent variable ``y``. |
| |
| This is a linear regression aggregate function. Only non-null pairs of the inputs |
| are evaluated. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param y: The linear regression dependent variable |
| :param x: The linear regression independent variable |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: repeat(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Repeats the ``string`` to ``n`` times. |
| |
| |
| .. py:function:: replace(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replaces all occurrences of ``from_val`` with ``to_val`` in the ``string``. |
| |
| |
| .. py:function:: reverse(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Reverse the string argument. |
| |
| |
| .. py:function:: right(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the last ``n`` characters in the ``string``. |
| |
| |
| .. py:function:: round(value: datafusion.expr.Expr, decimal_places: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Round the argument to the nearest integer. |
| |
| If the optional ``decimal_places`` is specified, round to the nearest number of |
| decimal places. You can specify a negative number of decimal places. For example |
| ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. |
| |
| |
| .. py:function:: row_number(partition_by: Optional[list[datafusion.expr.Expr] | datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Create a row number window function. |
| |
| Returns the row number of the window function. |
| |
| Here is an example of the ``row_number`` on a simple DataFrame:: |
| |
| +--------+------------+ |
| | points | row number | |
| +--------+------------+ |
| | 100 | 1 | |
| | 100 | 2 | |
| | 50 | 3 | |
| | 25 | 4 | |
| +--------+------------+ |
| |
| :param partition_by: Expressions to partition the window frame on. |
| :param order_by: Set ordering within the window frame. |
| |
| |
| .. py:function:: rpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Add right padding to a string. |
| |
| Extends the string to length length by appending the characters fill (a space |
| by default). If the string is already longer than length then it is truncated. |
| |
| |
| .. py:function:: rtrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes all characters, spaces by default, from the end of a string. |
| |
| |
| .. py:function:: sha224(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Computes the SHA-224 hash of a binary string. |
| |
| |
| .. py:function:: sha256(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Computes the SHA-256 hash of a binary string. |
| |
| |
| .. py:function:: sha384(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Computes the SHA-384 hash of a binary string. |
| |
| |
| .. py:function:: sha512(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Computes the SHA-512 hash of a binary string. |
| |
| |
| .. py:function:: signum(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the sign of the argument (-1, 0, +1). |
| |
| |
| .. py:function:: sin(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the sine of the argument. |
| |
| |
| .. py:function:: sinh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the hyperbolic sine of the argument. |
| |
| |
| .. py:function:: split_part(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, index: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Split a string and return one part. |
| |
| Splits a string based on a delimiter and picks out the desired field based |
| on the index. |
| |
| |
| .. py:function:: sqrt(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the square root of the argument. |
| |
| |
| .. py:function:: starts_with(string: datafusion.expr.Expr, prefix: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns true if string starts with prefix. |
| |
| |
| .. py:function:: stddev(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the standard deviation of the argument. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: The value to find the minimum of |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: stddev_pop(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the population standard deviation of the argument. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: The value to find the minimum of |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: stddev_samp(arg: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sample standard deviation of the argument. |
| |
| This is an alias for :py:func:`stddev`. |
| |
| |
| .. py:function:: string_agg(expression: datafusion.expr.Expr, delimiter: str, filter: Optional[datafusion.expr.Expr] = None, order_by: Optional[list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr] = None) -> datafusion.expr.Expr |
| |
| Concatenates the input strings. |
| |
| This aggregate function will concatenate input strings, ignoring null values, and |
| seperating them with the specified delimiter. Non-string values will be converted to |
| their string equivalents. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``distinct`` and ``null_treatment``. |
| |
| :param expression: Argument to perform bitwise calculation on |
| :param delimiter: Text to place between each value of expression |
| :param filter: If provided, only compute against rows for which the filter is True |
| :param order_by: Set the ordering of the expression to evaluate |
| |
| |
| .. py:function:: strpos(string: datafusion.expr.Expr, substring: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Finds the position from where the ``substring`` matches the ``string``. |
| |
| |
| .. py:function:: struct(*args: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns a struct with the given arguments. |
| |
| |
| .. py:function:: substr(string: datafusion.expr.Expr, position: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Substring from the ``position`` to the end. |
| |
| |
| .. py:function:: substr_index(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns an indexed substring. |
| |
| The return will be the ``string`` from before ``count`` occurrences of |
| ``delimiter``. |
| |
| |
| .. py:function:: substring(string: datafusion.expr.Expr, position: datafusion.expr.Expr, length: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Substring from the ``position`` with ``length`` characters. |
| |
| |
| .. py:function:: sum(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sum of a set of numbers. |
| |
| This aggregate function expects a numeric expression. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: Values to combine into an array |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: tan(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the tangent of the argument. |
| |
| |
| .. py:function:: tanh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Returns the hyperbolic tangent of the argument. |
| |
| |
| .. py:function:: to_hex(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts an integer to a hexadecimal string. |
| |
| |
| .. py:function:: to_timestamp(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string and optional formats to a ``Timestamp`` in nanoseconds. |
| |
| For usage of ``formatters`` see the rust chrono package ``strftime`` package. |
| |
| [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) |
| |
| |
| .. py:function:: to_timestamp_micros(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string and optional formats to a ``Timestamp`` in microseconds. |
| |
| See :py:func:`to_timestamp` for a description on how to use formatters. |
| |
| |
| .. py:function:: to_timestamp_millis(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string and optional formats to a ``Timestamp`` in milliseconds. |
| |
| See :py:func:`to_timestamp` for a description on how to use formatters. |
| |
| |
| .. py:function:: to_timestamp_nanos(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string and optional formats to a ``Timestamp`` in nanoseconds. |
| |
| See :py:func:`to_timestamp` for a description on how to use formatters. |
| |
| |
| .. py:function:: to_timestamp_seconds(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string and optional formats to a ``Timestamp`` in seconds. |
| |
| See :py:func:`to_timestamp` for a description on how to use formatters. |
| |
| |
| .. py:function:: to_unixtime(string: datafusion.expr.Expr, *format_arguments: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string and optional formats to a Unixtime. |
| |
| |
| .. py:function:: translate(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Replaces the characters in ``from_val`` with the counterpart in ``to_val``. |
| |
| |
| .. py:function:: trim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Removes all characters, spaces by default, from both sides of a string. |
| |
| |
| .. py:function:: trunc(num: datafusion.expr.Expr, precision: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr |
| |
| Truncate the number toward zero with optional precision. |
| |
| |
| .. py:function:: upper(arg: datafusion.expr.Expr) -> datafusion.expr.Expr |
| |
| Converts a string to uppercase. |
| |
| |
| .. py:function:: uuid() -> datafusion.expr.Expr |
| |
| Returns uuid v4 as a string value. |
| |
| |
| .. py:function:: var(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sample variance of the argument. |
| |
| This is an alias for :py:func:`var_samp`. |
| |
| |
| .. py:function:: var_pop(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the population variance of the argument. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: The variable to compute the variance for |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: var_samp(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sample variance of the argument. |
| |
| If using the builder functions described in ref:`_aggregation` this function ignores |
| the options ``order_by``, ``null_treatment``, and ``distinct``. |
| |
| :param expression: The variable to compute the variance for |
| :param filter: If provided, only compute against rows for which the filter is True |
| |
| |
| .. py:function:: var_sample(expression: datafusion.expr.Expr, filter: Optional[datafusion.expr.Expr] = None) -> datafusion.expr.Expr |
| |
| Computes the sample variance of the argument. |
| |
| This is an alias for :py:func:`var_samp`. |
| |
| |
| .. py:function:: when(when: datafusion.expr.Expr, then: datafusion.expr.Expr) -> datafusion.expr.CaseBuilder |
| |
| Create a case expression that has no base expression. |
| |
| Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the |
| expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for |
| detailed usage. |
| |
| |
| .. py:function:: window(name: str, args: list[datafusion.expr.Expr], partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.Expr | datafusion.expr.SortExpr] | datafusion.expr.Expr | datafusion.expr.SortExpr | None = None, window_frame: datafusion.expr.WindowFrame | None = None, ctx: datafusion.context.SessionContext | None = None) -> datafusion.expr.Expr |
| |
| Creates a new Window function expression. |
| |
| This interface will soon be deprecated. Instead of using this interface, |
| users should call the window functions directly. For example, to perform a |
| lag use:: |
| |
| df.select(functions.lag(col("a")).partition_by(col("b")).build()) |
| |
| |