| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| """ |
| @generated by mypy-protobuf. Do not edit manually! |
| isort:skip_file |
| |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| """ |
| import builtins |
| import collections.abc |
| import google.protobuf.any_pb2 |
| import google.protobuf.descriptor |
| import google.protobuf.internal.containers |
| import google.protobuf.internal.enum_type_wrapper |
| import google.protobuf.message |
| import pyspark.sql.connect.proto.catalog_pb2 |
| import pyspark.sql.connect.proto.expressions_pb2 |
| import pyspark.sql.connect.proto.types_pb2 |
| import sys |
| import typing |
| |
| if sys.version_info >= (3, 10): |
| import typing as typing_extensions |
| else: |
| import typing_extensions |
| |
| DESCRIPTOR: google.protobuf.descriptor.FileDescriptor |
| |
| class Relation(google.protobuf.message.Message): |
| """The main [[Relation]] type. Fundamentally, a relation is a typed container |
| that has exactly one explicit relation type set. |
| |
| When adding new relation types, they have to be registered here. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| COMMON_FIELD_NUMBER: builtins.int |
| READ_FIELD_NUMBER: builtins.int |
| PROJECT_FIELD_NUMBER: builtins.int |
| FILTER_FIELD_NUMBER: builtins.int |
| JOIN_FIELD_NUMBER: builtins.int |
| SET_OP_FIELD_NUMBER: builtins.int |
| SORT_FIELD_NUMBER: builtins.int |
| LIMIT_FIELD_NUMBER: builtins.int |
| AGGREGATE_FIELD_NUMBER: builtins.int |
| SQL_FIELD_NUMBER: builtins.int |
| LOCAL_RELATION_FIELD_NUMBER: builtins.int |
| SAMPLE_FIELD_NUMBER: builtins.int |
| OFFSET_FIELD_NUMBER: builtins.int |
| DEDUPLICATE_FIELD_NUMBER: builtins.int |
| RANGE_FIELD_NUMBER: builtins.int |
| SUBQUERY_ALIAS_FIELD_NUMBER: builtins.int |
| REPARTITION_FIELD_NUMBER: builtins.int |
| TO_DF_FIELD_NUMBER: builtins.int |
| WITH_COLUMNS_RENAMED_FIELD_NUMBER: builtins.int |
| SHOW_STRING_FIELD_NUMBER: builtins.int |
| DROP_FIELD_NUMBER: builtins.int |
| TAIL_FIELD_NUMBER: builtins.int |
| WITH_COLUMNS_FIELD_NUMBER: builtins.int |
| HINT_FIELD_NUMBER: builtins.int |
| UNPIVOT_FIELD_NUMBER: builtins.int |
| TO_SCHEMA_FIELD_NUMBER: builtins.int |
| REPARTITION_BY_EXPRESSION_FIELD_NUMBER: builtins.int |
| MAP_PARTITIONS_FIELD_NUMBER: builtins.int |
| COLLECT_METRICS_FIELD_NUMBER: builtins.int |
| PARSE_FIELD_NUMBER: builtins.int |
| GROUP_MAP_FIELD_NUMBER: builtins.int |
| CO_GROUP_MAP_FIELD_NUMBER: builtins.int |
| WITH_WATERMARK_FIELD_NUMBER: builtins.int |
| APPLY_IN_PANDAS_WITH_STATE_FIELD_NUMBER: builtins.int |
| HTML_STRING_FIELD_NUMBER: builtins.int |
| CACHED_LOCAL_RELATION_FIELD_NUMBER: builtins.int |
| CACHED_REMOTE_RELATION_FIELD_NUMBER: builtins.int |
| COMMON_INLINE_USER_DEFINED_TABLE_FUNCTION_FIELD_NUMBER: builtins.int |
| AS_OF_JOIN_FIELD_NUMBER: builtins.int |
| COMMON_INLINE_USER_DEFINED_DATA_SOURCE_FIELD_NUMBER: builtins.int |
| WITH_RELATIONS_FIELD_NUMBER: builtins.int |
| FILL_NA_FIELD_NUMBER: builtins.int |
| DROP_NA_FIELD_NUMBER: builtins.int |
| REPLACE_FIELD_NUMBER: builtins.int |
| SUMMARY_FIELD_NUMBER: builtins.int |
| CROSSTAB_FIELD_NUMBER: builtins.int |
| DESCRIBE_FIELD_NUMBER: builtins.int |
| COV_FIELD_NUMBER: builtins.int |
| CORR_FIELD_NUMBER: builtins.int |
| APPROX_QUANTILE_FIELD_NUMBER: builtins.int |
| FREQ_ITEMS_FIELD_NUMBER: builtins.int |
| SAMPLE_BY_FIELD_NUMBER: builtins.int |
| CATALOG_FIELD_NUMBER: builtins.int |
| EXTENSION_FIELD_NUMBER: builtins.int |
| UNKNOWN_FIELD_NUMBER: builtins.int |
| @property |
| def common(self) -> global___RelationCommon: ... |
| @property |
| def read(self) -> global___Read: ... |
| @property |
| def project(self) -> global___Project: ... |
| @property |
| def filter(self) -> global___Filter: ... |
| @property |
| def join(self) -> global___Join: ... |
| @property |
| def set_op(self) -> global___SetOperation: ... |
| @property |
| def sort(self) -> global___Sort: ... |
| @property |
| def limit(self) -> global___Limit: ... |
| @property |
| def aggregate(self) -> global___Aggregate: ... |
| @property |
| def sql(self) -> global___SQL: ... |
| @property |
| def local_relation(self) -> global___LocalRelation: ... |
| @property |
| def sample(self) -> global___Sample: ... |
| @property |
| def offset(self) -> global___Offset: ... |
| @property |
| def deduplicate(self) -> global___Deduplicate: ... |
| @property |
| def range(self) -> global___Range: ... |
| @property |
| def subquery_alias(self) -> global___SubqueryAlias: ... |
| @property |
| def repartition(self) -> global___Repartition: ... |
| @property |
| def to_df(self) -> global___ToDF: ... |
| @property |
| def with_columns_renamed(self) -> global___WithColumnsRenamed: ... |
| @property |
| def show_string(self) -> global___ShowString: ... |
| @property |
| def drop(self) -> global___Drop: ... |
| @property |
| def tail(self) -> global___Tail: ... |
| @property |
| def with_columns(self) -> global___WithColumns: ... |
| @property |
| def hint(self) -> global___Hint: ... |
| @property |
| def unpivot(self) -> global___Unpivot: ... |
| @property |
| def to_schema(self) -> global___ToSchema: ... |
| @property |
| def repartition_by_expression(self) -> global___RepartitionByExpression: ... |
| @property |
| def map_partitions(self) -> global___MapPartitions: ... |
| @property |
| def collect_metrics(self) -> global___CollectMetrics: ... |
| @property |
| def parse(self) -> global___Parse: ... |
| @property |
| def group_map(self) -> global___GroupMap: ... |
| @property |
| def co_group_map(self) -> global___CoGroupMap: ... |
| @property |
| def with_watermark(self) -> global___WithWatermark: ... |
| @property |
| def apply_in_pandas_with_state(self) -> global___ApplyInPandasWithState: ... |
| @property |
| def html_string(self) -> global___HtmlString: ... |
| @property |
| def cached_local_relation(self) -> global___CachedLocalRelation: ... |
| @property |
| def cached_remote_relation(self) -> global___CachedRemoteRelation: ... |
| @property |
| def common_inline_user_defined_table_function( |
| self, |
| ) -> global___CommonInlineUserDefinedTableFunction: ... |
| @property |
| def as_of_join(self) -> global___AsOfJoin: ... |
| @property |
| def common_inline_user_defined_data_source( |
| self, |
| ) -> global___CommonInlineUserDefinedDataSource: ... |
| @property |
| def with_relations(self) -> global___WithRelations: ... |
| @property |
| def fill_na(self) -> global___NAFill: |
| """NA functions""" |
| @property |
| def drop_na(self) -> global___NADrop: ... |
| @property |
| def replace(self) -> global___NAReplace: ... |
| @property |
| def summary(self) -> global___StatSummary: |
| """stat functions""" |
| @property |
| def crosstab(self) -> global___StatCrosstab: ... |
| @property |
| def describe(self) -> global___StatDescribe: ... |
| @property |
| def cov(self) -> global___StatCov: ... |
| @property |
| def corr(self) -> global___StatCorr: ... |
| @property |
| def approx_quantile(self) -> global___StatApproxQuantile: ... |
| @property |
| def freq_items(self) -> global___StatFreqItems: ... |
| @property |
| def sample_by(self) -> global___StatSampleBy: ... |
| @property |
| def catalog(self) -> pyspark.sql.connect.proto.catalog_pb2.Catalog: |
| """Catalog API (experimental / unstable)""" |
| @property |
| def extension(self) -> google.protobuf.any_pb2.Any: |
| """This field is used to mark extensions to the protocol. When plugins generate arbitrary |
| relations they can add them here. During the planning the correct resolution is done. |
| """ |
| @property |
| def unknown(self) -> global___Unknown: ... |
| def __init__( |
| self, |
| *, |
| common: global___RelationCommon | None = ..., |
| read: global___Read | None = ..., |
| project: global___Project | None = ..., |
| filter: global___Filter | None = ..., |
| join: global___Join | None = ..., |
| set_op: global___SetOperation | None = ..., |
| sort: global___Sort | None = ..., |
| limit: global___Limit | None = ..., |
| aggregate: global___Aggregate | None = ..., |
| sql: global___SQL | None = ..., |
| local_relation: global___LocalRelation | None = ..., |
| sample: global___Sample | None = ..., |
| offset: global___Offset | None = ..., |
| deduplicate: global___Deduplicate | None = ..., |
| range: global___Range | None = ..., |
| subquery_alias: global___SubqueryAlias | None = ..., |
| repartition: global___Repartition | None = ..., |
| to_df: global___ToDF | None = ..., |
| with_columns_renamed: global___WithColumnsRenamed | None = ..., |
| show_string: global___ShowString | None = ..., |
| drop: global___Drop | None = ..., |
| tail: global___Tail | None = ..., |
| with_columns: global___WithColumns | None = ..., |
| hint: global___Hint | None = ..., |
| unpivot: global___Unpivot | None = ..., |
| to_schema: global___ToSchema | None = ..., |
| repartition_by_expression: global___RepartitionByExpression | None = ..., |
| map_partitions: global___MapPartitions | None = ..., |
| collect_metrics: global___CollectMetrics | None = ..., |
| parse: global___Parse | None = ..., |
| group_map: global___GroupMap | None = ..., |
| co_group_map: global___CoGroupMap | None = ..., |
| with_watermark: global___WithWatermark | None = ..., |
| apply_in_pandas_with_state: global___ApplyInPandasWithState | None = ..., |
| html_string: global___HtmlString | None = ..., |
| cached_local_relation: global___CachedLocalRelation | None = ..., |
| cached_remote_relation: global___CachedRemoteRelation | None = ..., |
| common_inline_user_defined_table_function: global___CommonInlineUserDefinedTableFunction |
| | None = ..., |
| as_of_join: global___AsOfJoin | None = ..., |
| common_inline_user_defined_data_source: global___CommonInlineUserDefinedDataSource |
| | None = ..., |
| with_relations: global___WithRelations | None = ..., |
| fill_na: global___NAFill | None = ..., |
| drop_na: global___NADrop | None = ..., |
| replace: global___NAReplace | None = ..., |
| summary: global___StatSummary | None = ..., |
| crosstab: global___StatCrosstab | None = ..., |
| describe: global___StatDescribe | None = ..., |
| cov: global___StatCov | None = ..., |
| corr: global___StatCorr | None = ..., |
| approx_quantile: global___StatApproxQuantile | None = ..., |
| freq_items: global___StatFreqItems | None = ..., |
| sample_by: global___StatSampleBy | None = ..., |
| catalog: pyspark.sql.connect.proto.catalog_pb2.Catalog | None = ..., |
| extension: google.protobuf.any_pb2.Any | None = ..., |
| unknown: global___Unknown | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "aggregate", |
| b"aggregate", |
| "apply_in_pandas_with_state", |
| b"apply_in_pandas_with_state", |
| "approx_quantile", |
| b"approx_quantile", |
| "as_of_join", |
| b"as_of_join", |
| "cached_local_relation", |
| b"cached_local_relation", |
| "cached_remote_relation", |
| b"cached_remote_relation", |
| "catalog", |
| b"catalog", |
| "co_group_map", |
| b"co_group_map", |
| "collect_metrics", |
| b"collect_metrics", |
| "common", |
| b"common", |
| "common_inline_user_defined_data_source", |
| b"common_inline_user_defined_data_source", |
| "common_inline_user_defined_table_function", |
| b"common_inline_user_defined_table_function", |
| "corr", |
| b"corr", |
| "cov", |
| b"cov", |
| "crosstab", |
| b"crosstab", |
| "deduplicate", |
| b"deduplicate", |
| "describe", |
| b"describe", |
| "drop", |
| b"drop", |
| "drop_na", |
| b"drop_na", |
| "extension", |
| b"extension", |
| "fill_na", |
| b"fill_na", |
| "filter", |
| b"filter", |
| "freq_items", |
| b"freq_items", |
| "group_map", |
| b"group_map", |
| "hint", |
| b"hint", |
| "html_string", |
| b"html_string", |
| "join", |
| b"join", |
| "limit", |
| b"limit", |
| "local_relation", |
| b"local_relation", |
| "map_partitions", |
| b"map_partitions", |
| "offset", |
| b"offset", |
| "parse", |
| b"parse", |
| "project", |
| b"project", |
| "range", |
| b"range", |
| "read", |
| b"read", |
| "rel_type", |
| b"rel_type", |
| "repartition", |
| b"repartition", |
| "repartition_by_expression", |
| b"repartition_by_expression", |
| "replace", |
| b"replace", |
| "sample", |
| b"sample", |
| "sample_by", |
| b"sample_by", |
| "set_op", |
| b"set_op", |
| "show_string", |
| b"show_string", |
| "sort", |
| b"sort", |
| "sql", |
| b"sql", |
| "subquery_alias", |
| b"subquery_alias", |
| "summary", |
| b"summary", |
| "tail", |
| b"tail", |
| "to_df", |
| b"to_df", |
| "to_schema", |
| b"to_schema", |
| "unknown", |
| b"unknown", |
| "unpivot", |
| b"unpivot", |
| "with_columns", |
| b"with_columns", |
| "with_columns_renamed", |
| b"with_columns_renamed", |
| "with_relations", |
| b"with_relations", |
| "with_watermark", |
| b"with_watermark", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "aggregate", |
| b"aggregate", |
| "apply_in_pandas_with_state", |
| b"apply_in_pandas_with_state", |
| "approx_quantile", |
| b"approx_quantile", |
| "as_of_join", |
| b"as_of_join", |
| "cached_local_relation", |
| b"cached_local_relation", |
| "cached_remote_relation", |
| b"cached_remote_relation", |
| "catalog", |
| b"catalog", |
| "co_group_map", |
| b"co_group_map", |
| "collect_metrics", |
| b"collect_metrics", |
| "common", |
| b"common", |
| "common_inline_user_defined_data_source", |
| b"common_inline_user_defined_data_source", |
| "common_inline_user_defined_table_function", |
| b"common_inline_user_defined_table_function", |
| "corr", |
| b"corr", |
| "cov", |
| b"cov", |
| "crosstab", |
| b"crosstab", |
| "deduplicate", |
| b"deduplicate", |
| "describe", |
| b"describe", |
| "drop", |
| b"drop", |
| "drop_na", |
| b"drop_na", |
| "extension", |
| b"extension", |
| "fill_na", |
| b"fill_na", |
| "filter", |
| b"filter", |
| "freq_items", |
| b"freq_items", |
| "group_map", |
| b"group_map", |
| "hint", |
| b"hint", |
| "html_string", |
| b"html_string", |
| "join", |
| b"join", |
| "limit", |
| b"limit", |
| "local_relation", |
| b"local_relation", |
| "map_partitions", |
| b"map_partitions", |
| "offset", |
| b"offset", |
| "parse", |
| b"parse", |
| "project", |
| b"project", |
| "range", |
| b"range", |
| "read", |
| b"read", |
| "rel_type", |
| b"rel_type", |
| "repartition", |
| b"repartition", |
| "repartition_by_expression", |
| b"repartition_by_expression", |
| "replace", |
| b"replace", |
| "sample", |
| b"sample", |
| "sample_by", |
| b"sample_by", |
| "set_op", |
| b"set_op", |
| "show_string", |
| b"show_string", |
| "sort", |
| b"sort", |
| "sql", |
| b"sql", |
| "subquery_alias", |
| b"subquery_alias", |
| "summary", |
| b"summary", |
| "tail", |
| b"tail", |
| "to_df", |
| b"to_df", |
| "to_schema", |
| b"to_schema", |
| "unknown", |
| b"unknown", |
| "unpivot", |
| b"unpivot", |
| "with_columns", |
| b"with_columns", |
| "with_columns_renamed", |
| b"with_columns_renamed", |
| "with_relations", |
| b"with_relations", |
| "with_watermark", |
| b"with_watermark", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["rel_type", b"rel_type"] |
| ) -> ( |
| typing_extensions.Literal[ |
| "read", |
| "project", |
| "filter", |
| "join", |
| "set_op", |
| "sort", |
| "limit", |
| "aggregate", |
| "sql", |
| "local_relation", |
| "sample", |
| "offset", |
| "deduplicate", |
| "range", |
| "subquery_alias", |
| "repartition", |
| "to_df", |
| "with_columns_renamed", |
| "show_string", |
| "drop", |
| "tail", |
| "with_columns", |
| "hint", |
| "unpivot", |
| "to_schema", |
| "repartition_by_expression", |
| "map_partitions", |
| "collect_metrics", |
| "parse", |
| "group_map", |
| "co_group_map", |
| "with_watermark", |
| "apply_in_pandas_with_state", |
| "html_string", |
| "cached_local_relation", |
| "cached_remote_relation", |
| "common_inline_user_defined_table_function", |
| "as_of_join", |
| "common_inline_user_defined_data_source", |
| "with_relations", |
| "fill_na", |
| "drop_na", |
| "replace", |
| "summary", |
| "crosstab", |
| "describe", |
| "cov", |
| "corr", |
| "approx_quantile", |
| "freq_items", |
| "sample_by", |
| "catalog", |
| "extension", |
| "unknown", |
| ] |
| | None |
| ): ... |
| |
| global___Relation = Relation |
| |
| class Unknown(google.protobuf.message.Message): |
| """Used for testing purposes only.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| def __init__( |
| self, |
| ) -> None: ... |
| |
| global___Unknown = Unknown |
| |
| class RelationCommon(google.protobuf.message.Message): |
| """Common metadata of all relations.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| SOURCE_INFO_FIELD_NUMBER: builtins.int |
| PLAN_ID_FIELD_NUMBER: builtins.int |
| source_info: builtins.str |
| """(Required) Shared relation metadata.""" |
| plan_id: builtins.int |
| """(Optional) A per-client globally unique id for a given connect plan.""" |
| def __init__( |
| self, |
| *, |
| source_info: builtins.str = ..., |
| plan_id: builtins.int | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info" |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"] |
| ) -> typing_extensions.Literal["plan_id"] | None: ... |
| |
| global___RelationCommon = RelationCommon |
| |
| class SQL(google.protobuf.message.Message): |
| """Relation that uses a SQL query to generate the output.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class ArgsEntry(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| KEY_FIELD_NUMBER: builtins.int |
| VALUE_FIELD_NUMBER: builtins.int |
| key: builtins.str |
| @property |
| def value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: ... |
| def __init__( |
| self, |
| *, |
| key: builtins.str = ..., |
| value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["value", b"value"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] |
| ) -> None: ... |
| |
| class NamedArgumentsEntry(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| KEY_FIELD_NUMBER: builtins.int |
| VALUE_FIELD_NUMBER: builtins.int |
| key: builtins.str |
| @property |
| def value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: ... |
| def __init__( |
| self, |
| *, |
| key: builtins.str = ..., |
| value: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["value", b"value"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] |
| ) -> None: ... |
| |
| QUERY_FIELD_NUMBER: builtins.int |
| ARGS_FIELD_NUMBER: builtins.int |
| POS_ARGS_FIELD_NUMBER: builtins.int |
| NAMED_ARGUMENTS_FIELD_NUMBER: builtins.int |
| POS_ARGUMENTS_FIELD_NUMBER: builtins.int |
| query: builtins.str |
| """(Required) The SQL query.""" |
| @property |
| def args( |
| self, |
| ) -> google.protobuf.internal.containers.MessageMap[ |
| builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ]: |
| """(Optional) A map of parameter names to literal expressions.""" |
| @property |
| def pos_args( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ]: |
| """(Optional) A sequence of literal expressions for positional parameters in the SQL query text.""" |
| @property |
| def named_arguments( |
| self, |
| ) -> google.protobuf.internal.containers.MessageMap[ |
| builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) A map of parameter names to expressions. |
| It cannot coexist with `pos_arguments`. |
| """ |
| @property |
| def pos_arguments( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) A sequence of expressions for positional parameters in the SQL query text. |
| It cannot coexist with `named_arguments`. |
| """ |
| def __init__( |
| self, |
| *, |
| query: builtins.str = ..., |
| args: collections.abc.Mapping[ |
| builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ] |
| | None = ..., |
| pos_args: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ] |
| | None = ..., |
| named_arguments: collections.abc.Mapping[ |
| builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| pos_arguments: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| ) -> None: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "args", |
| b"args", |
| "named_arguments", |
| b"named_arguments", |
| "pos_args", |
| b"pos_args", |
| "pos_arguments", |
| b"pos_arguments", |
| "query", |
| b"query", |
| ], |
| ) -> None: ... |
| |
| global___SQL = SQL |
| |
| class WithRelations(google.protobuf.message.Message): |
| """Relation of type [[WithRelations]]. |
| |
| This relation contains a root plan, and one or more references that are used by the root plan. |
| There are two ways of referencing a relation, by name (through a subquery alias), or by plan_id |
| (using RelationCommon.plan_id). |
| |
| This relation can be used to implement CTEs, describe DAGs, or to reduce tree depth. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| ROOT_FIELD_NUMBER: builtins.int |
| REFERENCES_FIELD_NUMBER: builtins.int |
| @property |
| def root(self) -> global___Relation: |
| """(Required) Plan at the root of the query tree. This plan is expected to contain one or more |
| references. Those references get expanded later on by the engine. |
| """ |
| @property |
| def references( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Relation]: |
| """(Required) Plans referenced by the root plan. Relations in this list are also allowed to |
| contain references to other relations in this list, as long they do not form cycles. |
| """ |
| def __init__( |
| self, |
| *, |
| root: global___Relation | None = ..., |
| references: collections.abc.Iterable[global___Relation] | None = ..., |
| ) -> None: ... |
| def HasField(self, field_name: typing_extensions.Literal["root", b"root"]) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["references", b"references", "root", b"root"] |
| ) -> None: ... |
| |
| global___WithRelations = WithRelations |
| |
| class Read(google.protobuf.message.Message): |
| """Relation that reads from a file / table or other data source. Does not have additional |
| inputs. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class NamedTable(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class OptionsEntry(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| KEY_FIELD_NUMBER: builtins.int |
| VALUE_FIELD_NUMBER: builtins.int |
| key: builtins.str |
| value: builtins.str |
| def __init__( |
| self, |
| *, |
| key: builtins.str = ..., |
| value: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] |
| ) -> None: ... |
| |
| UNPARSED_IDENTIFIER_FIELD_NUMBER: builtins.int |
| OPTIONS_FIELD_NUMBER: builtins.int |
| unparsed_identifier: builtins.str |
| """(Required) Unparsed identifier for the table.""" |
| @property |
| def options( |
| self, |
| ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: |
| """Options for the named table. The map key is case insensitive.""" |
| def __init__( |
| self, |
| *, |
| unparsed_identifier: builtins.str = ..., |
| options: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., |
| ) -> None: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "options", b"options", "unparsed_identifier", b"unparsed_identifier" |
| ], |
| ) -> None: ... |
| |
| class DataSource(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class OptionsEntry(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| KEY_FIELD_NUMBER: builtins.int |
| VALUE_FIELD_NUMBER: builtins.int |
| key: builtins.str |
| value: builtins.str |
| def __init__( |
| self, |
| *, |
| key: builtins.str = ..., |
| value: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] |
| ) -> None: ... |
| |
| FORMAT_FIELD_NUMBER: builtins.int |
| SCHEMA_FIELD_NUMBER: builtins.int |
| OPTIONS_FIELD_NUMBER: builtins.int |
| PATHS_FIELD_NUMBER: builtins.int |
| PREDICATES_FIELD_NUMBER: builtins.int |
| format: builtins.str |
| """(Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro. |
| |
| If not set, the value from SQL conf 'spark.sql.sources.default' will be used. |
| """ |
| schema: builtins.str |
| """(Optional) If not set, Spark will infer the schema. |
| |
| This schema string should be either DDL-formatted or JSON-formatted. |
| """ |
| @property |
| def options( |
| self, |
| ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: |
| """Options for the data source. The context of this map varies based on the |
| data source format. This options could be empty for valid data source format. |
| The map key is case insensitive. |
| """ |
| @property |
| def paths( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) A list of path for file-system backed data sources.""" |
| @property |
| def predicates( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Condition in the where clause for each partition. |
| |
| This is only supported by the JDBC data source. |
| """ |
| def __init__( |
| self, |
| *, |
| format: builtins.str | None = ..., |
| schema: builtins.str | None = ..., |
| options: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., |
| paths: collections.abc.Iterable[builtins.str] | None = ..., |
| predicates: collections.abc.Iterable[builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_format", |
| b"_format", |
| "_schema", |
| b"_schema", |
| "format", |
| b"format", |
| "schema", |
| b"schema", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_format", |
| b"_format", |
| "_schema", |
| b"_schema", |
| "format", |
| b"format", |
| "options", |
| b"options", |
| "paths", |
| b"paths", |
| "predicates", |
| b"predicates", |
| "schema", |
| b"schema", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_format", b"_format"] |
| ) -> typing_extensions.Literal["format"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_schema", b"_schema"] |
| ) -> typing_extensions.Literal["schema"] | None: ... |
| |
| NAMED_TABLE_FIELD_NUMBER: builtins.int |
| DATA_SOURCE_FIELD_NUMBER: builtins.int |
| IS_STREAMING_FIELD_NUMBER: builtins.int |
| @property |
| def named_table(self) -> global___Read.NamedTable: ... |
| @property |
| def data_source(self) -> global___Read.DataSource: ... |
| is_streaming: builtins.bool |
| """(Optional) Indicates if this is a streaming read.""" |
| def __init__( |
| self, |
| *, |
| named_table: global___Read.NamedTable | None = ..., |
| data_source: global___Read.DataSource | None = ..., |
| is_streaming: builtins.bool = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "data_source", b"data_source", "named_table", b"named_table", "read_type", b"read_type" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "data_source", |
| b"data_source", |
| "is_streaming", |
| b"is_streaming", |
| "named_table", |
| b"named_table", |
| "read_type", |
| b"read_type", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["read_type", b"read_type"] |
| ) -> typing_extensions.Literal["named_table", "data_source"] | None: ... |
| |
| global___Read = Read |
| |
| class Project(google.protobuf.message.Message): |
| """Projection of a bag of expressions for a given input relation. |
| |
| The input relation must be specified. |
| The projected expression can be an arbitrary expression. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| EXPRESSIONS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Optional) Input relation is optional for Project. |
| |
| For example, `SELECT ABS(-1)` is valid plan without an input plan. |
| """ |
| @property |
| def expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) A Project requires at least one expression.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| expressions: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal["expressions", b"expressions", "input", b"input"], |
| ) -> None: ... |
| |
| global___Project = Project |
| |
| class Filter(google.protobuf.message.Message): |
| """Relation that applies a boolean expression `condition` on each row of `input` to produce |
| the output result. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| CONDITION_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a Filter.""" |
| @property |
| def condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Required) A Filter must have a condition expression.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["condition", b"condition", "input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["condition", b"condition", "input", b"input"] |
| ) -> None: ... |
| |
| global___Filter = Filter |
| |
| class Join(google.protobuf.message.Message): |
| """Relation of type [[Join]]. |
| |
| `left` and `right` must be present. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class _JoinType: |
| ValueType = typing.NewType("ValueType", builtins.int) |
| V: typing_extensions.TypeAlias = ValueType |
| |
| class _JoinTypeEnumTypeWrapper( |
| google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Join._JoinType.ValueType], |
| builtins.type, |
| ): # noqa: F821 |
| DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor |
| JOIN_TYPE_UNSPECIFIED: Join._JoinType.ValueType # 0 |
| JOIN_TYPE_INNER: Join._JoinType.ValueType # 1 |
| JOIN_TYPE_FULL_OUTER: Join._JoinType.ValueType # 2 |
| JOIN_TYPE_LEFT_OUTER: Join._JoinType.ValueType # 3 |
| JOIN_TYPE_RIGHT_OUTER: Join._JoinType.ValueType # 4 |
| JOIN_TYPE_LEFT_ANTI: Join._JoinType.ValueType # 5 |
| JOIN_TYPE_LEFT_SEMI: Join._JoinType.ValueType # 6 |
| JOIN_TYPE_CROSS: Join._JoinType.ValueType # 7 |
| |
| class JoinType(_JoinType, metaclass=_JoinTypeEnumTypeWrapper): ... |
| JOIN_TYPE_UNSPECIFIED: Join.JoinType.ValueType # 0 |
| JOIN_TYPE_INNER: Join.JoinType.ValueType # 1 |
| JOIN_TYPE_FULL_OUTER: Join.JoinType.ValueType # 2 |
| JOIN_TYPE_LEFT_OUTER: Join.JoinType.ValueType # 3 |
| JOIN_TYPE_RIGHT_OUTER: Join.JoinType.ValueType # 4 |
| JOIN_TYPE_LEFT_ANTI: Join.JoinType.ValueType # 5 |
| JOIN_TYPE_LEFT_SEMI: Join.JoinType.ValueType # 6 |
| JOIN_TYPE_CROSS: Join.JoinType.ValueType # 7 |
| |
| class JoinDataType(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| IS_LEFT_STRUCT_FIELD_NUMBER: builtins.int |
| IS_RIGHT_STRUCT_FIELD_NUMBER: builtins.int |
| is_left_struct: builtins.bool |
| """If the left data type is a struct.""" |
| is_right_struct: builtins.bool |
| """If the right data type is a struct.""" |
| def __init__( |
| self, |
| *, |
| is_left_struct: builtins.bool = ..., |
| is_right_struct: builtins.bool = ..., |
| ) -> None: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "is_left_struct", b"is_left_struct", "is_right_struct", b"is_right_struct" |
| ], |
| ) -> None: ... |
| |
| LEFT_FIELD_NUMBER: builtins.int |
| RIGHT_FIELD_NUMBER: builtins.int |
| JOIN_CONDITION_FIELD_NUMBER: builtins.int |
| JOIN_TYPE_FIELD_NUMBER: builtins.int |
| USING_COLUMNS_FIELD_NUMBER: builtins.int |
| JOIN_DATA_TYPE_FIELD_NUMBER: builtins.int |
| @property |
| def left(self) -> global___Relation: |
| """(Required) Left input relation for a Join.""" |
| @property |
| def right(self) -> global___Relation: |
| """(Required) Right input relation for a Join.""" |
| @property |
| def join_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Optional) The join condition. Could be unset when `using_columns` is utilized. |
| |
| This field does not co-exist with using_columns. |
| """ |
| join_type: global___Join.JoinType.ValueType |
| """(Required) The join type.""" |
| @property |
| def using_columns( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """Optional. using_columns provides a list of columns that should present on both sides of |
| the join inputs that this Join will join on. For example A JOIN B USING col_name is |
| equivalent to A JOIN B on A.col_name = B.col_name. |
| |
| This field does not co-exist with join_condition. |
| """ |
| @property |
| def join_data_type(self) -> global___Join.JoinDataType: |
| """(Optional) Only used by joinWith. Set the left and right join data types.""" |
| def __init__( |
| self, |
| *, |
| left: global___Relation | None = ..., |
| right: global___Relation | None = ..., |
| join_condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| join_type: global___Join.JoinType.ValueType = ..., |
| using_columns: collections.abc.Iterable[builtins.str] | None = ..., |
| join_data_type: global___Join.JoinDataType | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_join_data_type", |
| b"_join_data_type", |
| "join_condition", |
| b"join_condition", |
| "join_data_type", |
| b"join_data_type", |
| "left", |
| b"left", |
| "right", |
| b"right", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_join_data_type", |
| b"_join_data_type", |
| "join_condition", |
| b"join_condition", |
| "join_data_type", |
| b"join_data_type", |
| "join_type", |
| b"join_type", |
| "left", |
| b"left", |
| "right", |
| b"right", |
| "using_columns", |
| b"using_columns", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_join_data_type", b"_join_data_type"] |
| ) -> typing_extensions.Literal["join_data_type"] | None: ... |
| |
| global___Join = Join |
| |
| class SetOperation(google.protobuf.message.Message): |
| """Relation of type [[SetOperation]]""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class _SetOpType: |
| ValueType = typing.NewType("ValueType", builtins.int) |
| V: typing_extensions.TypeAlias = ValueType |
| |
| class _SetOpTypeEnumTypeWrapper( |
| google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[ |
| SetOperation._SetOpType.ValueType |
| ], |
| builtins.type, |
| ): # noqa: F821 |
| DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor |
| SET_OP_TYPE_UNSPECIFIED: SetOperation._SetOpType.ValueType # 0 |
| SET_OP_TYPE_INTERSECT: SetOperation._SetOpType.ValueType # 1 |
| SET_OP_TYPE_UNION: SetOperation._SetOpType.ValueType # 2 |
| SET_OP_TYPE_EXCEPT: SetOperation._SetOpType.ValueType # 3 |
| |
| class SetOpType(_SetOpType, metaclass=_SetOpTypeEnumTypeWrapper): ... |
| SET_OP_TYPE_UNSPECIFIED: SetOperation.SetOpType.ValueType # 0 |
| SET_OP_TYPE_INTERSECT: SetOperation.SetOpType.ValueType # 1 |
| SET_OP_TYPE_UNION: SetOperation.SetOpType.ValueType # 2 |
| SET_OP_TYPE_EXCEPT: SetOperation.SetOpType.ValueType # 3 |
| |
| LEFT_INPUT_FIELD_NUMBER: builtins.int |
| RIGHT_INPUT_FIELD_NUMBER: builtins.int |
| SET_OP_TYPE_FIELD_NUMBER: builtins.int |
| IS_ALL_FIELD_NUMBER: builtins.int |
| BY_NAME_FIELD_NUMBER: builtins.int |
| ALLOW_MISSING_COLUMNS_FIELD_NUMBER: builtins.int |
| @property |
| def left_input(self) -> global___Relation: |
| """(Required) Left input relation for a Set operation.""" |
| @property |
| def right_input(self) -> global___Relation: |
| """(Required) Right input relation for a Set operation.""" |
| set_op_type: global___SetOperation.SetOpType.ValueType |
| """(Required) The Set operation type.""" |
| is_all: builtins.bool |
| """(Optional) If to remove duplicate rows. |
| |
| True to preserve all results. |
| False to remove duplicate rows. |
| """ |
| by_name: builtins.bool |
| """(Optional) If to perform the Set operation based on name resolution. |
| |
| Only UNION supports this option. |
| """ |
| allow_missing_columns: builtins.bool |
| """(Optional) If to perform the Set operation and allow missing columns. |
| |
| Only UNION supports this option. |
| """ |
| def __init__( |
| self, |
| *, |
| left_input: global___Relation | None = ..., |
| right_input: global___Relation | None = ..., |
| set_op_type: global___SetOperation.SetOpType.ValueType = ..., |
| is_all: builtins.bool | None = ..., |
| by_name: builtins.bool | None = ..., |
| allow_missing_columns: builtins.bool | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_allow_missing_columns", |
| b"_allow_missing_columns", |
| "_by_name", |
| b"_by_name", |
| "_is_all", |
| b"_is_all", |
| "allow_missing_columns", |
| b"allow_missing_columns", |
| "by_name", |
| b"by_name", |
| "is_all", |
| b"is_all", |
| "left_input", |
| b"left_input", |
| "right_input", |
| b"right_input", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_allow_missing_columns", |
| b"_allow_missing_columns", |
| "_by_name", |
| b"_by_name", |
| "_is_all", |
| b"_is_all", |
| "allow_missing_columns", |
| b"allow_missing_columns", |
| "by_name", |
| b"by_name", |
| "is_all", |
| b"is_all", |
| "left_input", |
| b"left_input", |
| "right_input", |
| b"right_input", |
| "set_op_type", |
| b"set_op_type", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, |
| oneof_group: typing_extensions.Literal["_allow_missing_columns", b"_allow_missing_columns"], |
| ) -> typing_extensions.Literal["allow_missing_columns"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_by_name", b"_by_name"] |
| ) -> typing_extensions.Literal["by_name"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_is_all", b"_is_all"] |
| ) -> typing_extensions.Literal["is_all"] | None: ... |
| |
| global___SetOperation = SetOperation |
| |
| class Limit(google.protobuf.message.Message): |
| """Relation of type [[Limit]] that is used to `limit` rows from the input relation.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| LIMIT_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a Limit.""" |
| limit: builtins.int |
| """(Required) the limit.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| limit: builtins.int = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["input", b"input", "limit", b"limit"] |
| ) -> None: ... |
| |
| global___Limit = Limit |
| |
| class Offset(google.protobuf.message.Message): |
| """Relation of type [[Offset]] that is used to read rows staring from the `offset` on |
| the input relation. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| OFFSET_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for an Offset.""" |
| offset: builtins.int |
| """(Required) the limit.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| offset: builtins.int = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["input", b"input", "offset", b"offset"] |
| ) -> None: ... |
| |
| global___Offset = Offset |
| |
| class Tail(google.protobuf.message.Message): |
| """Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| LIMIT_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for an Tail.""" |
| limit: builtins.int |
| """(Required) the limit.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| limit: builtins.int = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["input", b"input", "limit", b"limit"] |
| ) -> None: ... |
| |
| global___Tail = Tail |
| |
| class Aggregate(google.protobuf.message.Message): |
| """Relation of type [[Aggregate]].""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class _GroupType: |
| ValueType = typing.NewType("ValueType", builtins.int) |
| V: typing_extensions.TypeAlias = ValueType |
| |
| class _GroupTypeEnumTypeWrapper( |
| google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Aggregate._GroupType.ValueType], |
| builtins.type, |
| ): # noqa: F821 |
| DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor |
| GROUP_TYPE_UNSPECIFIED: Aggregate._GroupType.ValueType # 0 |
| GROUP_TYPE_GROUPBY: Aggregate._GroupType.ValueType # 1 |
| GROUP_TYPE_ROLLUP: Aggregate._GroupType.ValueType # 2 |
| GROUP_TYPE_CUBE: Aggregate._GroupType.ValueType # 3 |
| GROUP_TYPE_PIVOT: Aggregate._GroupType.ValueType # 4 |
| GROUP_TYPE_GROUPING_SETS: Aggregate._GroupType.ValueType # 5 |
| |
| class GroupType(_GroupType, metaclass=_GroupTypeEnumTypeWrapper): ... |
| GROUP_TYPE_UNSPECIFIED: Aggregate.GroupType.ValueType # 0 |
| GROUP_TYPE_GROUPBY: Aggregate.GroupType.ValueType # 1 |
| GROUP_TYPE_ROLLUP: Aggregate.GroupType.ValueType # 2 |
| GROUP_TYPE_CUBE: Aggregate.GroupType.ValueType # 3 |
| GROUP_TYPE_PIVOT: Aggregate.GroupType.ValueType # 4 |
| GROUP_TYPE_GROUPING_SETS: Aggregate.GroupType.ValueType # 5 |
| |
| class Pivot(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| COL_FIELD_NUMBER: builtins.int |
| VALUES_FIELD_NUMBER: builtins.int |
| @property |
| def col(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Required) The column to pivot""" |
| @property |
| def values( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ]: |
| """(Optional) List of values that will be translated to columns in the output DataFrame. |
| |
| Note that if it is empty, the server side will immediately trigger a job to collect |
| the distinct values of the column. |
| """ |
| def __init__( |
| self, |
| *, |
| col: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| values: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["col", b"col"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["col", b"col", "values", b"values"] |
| ) -> None: ... |
| |
| class GroupingSets(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| GROUPING_SET_FIELD_NUMBER: builtins.int |
| @property |
| def grouping_set( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) Individual grouping set""" |
| def __init__( |
| self, |
| *, |
| grouping_set: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["grouping_set", b"grouping_set"] |
| ) -> None: ... |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| GROUP_TYPE_FIELD_NUMBER: builtins.int |
| GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| AGGREGATE_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| PIVOT_FIELD_NUMBER: builtins.int |
| GROUPING_SETS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a RelationalGroupedDataset.""" |
| group_type: global___Aggregate.GroupType.ValueType |
| """(Required) How the RelationalGroupedDataset was built.""" |
| @property |
| def grouping_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) Expressions for grouping keys""" |
| @property |
| def aggregate_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) List of values that will be translated to columns in the output DataFrame.""" |
| @property |
| def pivot(self) -> global___Aggregate.Pivot: |
| """(Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation.""" |
| @property |
| def grouping_sets( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| global___Aggregate.GroupingSets |
| ]: |
| """(Optional) List of values that will be translated to columns in the output DataFrame.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| group_type: global___Aggregate.GroupType.ValueType = ..., |
| grouping_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| aggregate_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| pivot: global___Aggregate.Pivot | None = ..., |
| grouping_sets: collections.abc.Iterable[global___Aggregate.GroupingSets] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input", "pivot", b"pivot"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "aggregate_expressions", |
| b"aggregate_expressions", |
| "group_type", |
| b"group_type", |
| "grouping_expressions", |
| b"grouping_expressions", |
| "grouping_sets", |
| b"grouping_sets", |
| "input", |
| b"input", |
| "pivot", |
| b"pivot", |
| ], |
| ) -> None: ... |
| |
| global___Aggregate = Aggregate |
| |
| class Sort(google.protobuf.message.Message): |
| """Relation of type [[Sort]].""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| ORDER_FIELD_NUMBER: builtins.int |
| IS_GLOBAL_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a Sort.""" |
| @property |
| def order( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.SortOrder |
| ]: |
| """(Required) The ordering expressions""" |
| is_global: builtins.bool |
| """(Optional) if this is a global sort.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| order: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.SortOrder |
| ] |
| | None = ..., |
| is_global: builtins.bool | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_is_global", b"_is_global", "input", b"input", "is_global", b"is_global" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_is_global", |
| b"_is_global", |
| "input", |
| b"input", |
| "is_global", |
| b"is_global", |
| "order", |
| b"order", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_is_global", b"_is_global"] |
| ) -> typing_extensions.Literal["is_global"] | None: ... |
| |
| global___Sort = Sort |
| |
| class Drop(google.protobuf.message.Message): |
| """Drop specified columns.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLUMNS_FIELD_NUMBER: builtins.int |
| COLUMN_NAMES_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def columns( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) columns to drop.""" |
| @property |
| def column_names( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) names of columns to drop.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| columns: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| column_names: collections.abc.Iterable[builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "column_names", b"column_names", "columns", b"columns", "input", b"input" |
| ], |
| ) -> None: ... |
| |
| global___Drop = Drop |
| |
| class Deduplicate(google.protobuf.message.Message): |
| """Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only |
| the subset of columns or all the columns. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLUMN_NAMES_FIELD_NUMBER: builtins.int |
| ALL_COLUMNS_AS_KEYS_FIELD_NUMBER: builtins.int |
| WITHIN_WATERMARK_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a Deduplicate.""" |
| @property |
| def column_names( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Deduplicate based on a list of column names. |
| |
| This field does not co-use with `all_columns_as_keys`. |
| """ |
| all_columns_as_keys: builtins.bool |
| """(Optional) Deduplicate based on all the columns of the input relation. |
| |
| This field does not co-use with `column_names`. |
| """ |
| within_watermark: builtins.bool |
| """(Optional) Deduplicate within the time range of watermark.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| column_names: collections.abc.Iterable[builtins.str] | None = ..., |
| all_columns_as_keys: builtins.bool | None = ..., |
| within_watermark: builtins.bool | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_all_columns_as_keys", |
| b"_all_columns_as_keys", |
| "_within_watermark", |
| b"_within_watermark", |
| "all_columns_as_keys", |
| b"all_columns_as_keys", |
| "input", |
| b"input", |
| "within_watermark", |
| b"within_watermark", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_all_columns_as_keys", |
| b"_all_columns_as_keys", |
| "_within_watermark", |
| b"_within_watermark", |
| "all_columns_as_keys", |
| b"all_columns_as_keys", |
| "column_names", |
| b"column_names", |
| "input", |
| b"input", |
| "within_watermark", |
| b"within_watermark", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, |
| oneof_group: typing_extensions.Literal["_all_columns_as_keys", b"_all_columns_as_keys"], |
| ) -> typing_extensions.Literal["all_columns_as_keys"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_within_watermark", b"_within_watermark"] |
| ) -> typing_extensions.Literal["within_watermark"] | None: ... |
| |
| global___Deduplicate = Deduplicate |
| |
| class LocalRelation(google.protobuf.message.Message): |
| """A relation that does not need to be qualified by name.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| DATA_FIELD_NUMBER: builtins.int |
| SCHEMA_FIELD_NUMBER: builtins.int |
| data: builtins.bytes |
| """(Optional) Local collection data serialized into Arrow IPC streaming format which contains |
| the schema of the data. |
| """ |
| schema: builtins.str |
| """(Optional) The schema of local data. |
| It should be either a DDL-formatted type string or a JSON string. |
| |
| The server side will update the column names and data types according to this schema. |
| If the 'data' is not provided, then this schema will be required. |
| """ |
| def __init__( |
| self, |
| *, |
| data: builtins.bytes | None = ..., |
| schema: builtins.str | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_data", b"_data", "_schema", b"_schema", "data", b"data", "schema", b"schema" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_data", b"_data", "_schema", b"_schema", "data", b"data", "schema", b"schema" |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_data", b"_data"] |
| ) -> typing_extensions.Literal["data"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_schema", b"_schema"] |
| ) -> typing_extensions.Literal["schema"] | None: ... |
| |
| global___LocalRelation = LocalRelation |
| |
| class CachedLocalRelation(google.protobuf.message.Message): |
| """A local relation that has been cached already.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| HASH_FIELD_NUMBER: builtins.int |
| hash: builtins.str |
| """(Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation.""" |
| def __init__( |
| self, |
| *, |
| hash: builtins.str = ..., |
| ) -> None: ... |
| def ClearField(self, field_name: typing_extensions.Literal["hash", b"hash"]) -> None: ... |
| |
| global___CachedLocalRelation = CachedLocalRelation |
| |
| class CachedRemoteRelation(google.protobuf.message.Message): |
| """Represents a remote relation that has been cached on server.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| RELATION_ID_FIELD_NUMBER: builtins.int |
| relation_id: builtins.str |
| """(Required) ID of the remote related (assigned by the service).""" |
| def __init__( |
| self, |
| *, |
| relation_id: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["relation_id", b"relation_id"] |
| ) -> None: ... |
| |
| global___CachedRemoteRelation = CachedRemoteRelation |
| |
| class Sample(google.protobuf.message.Message): |
| """Relation of type [[Sample]] that samples a fraction of the dataset.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| LOWER_BOUND_FIELD_NUMBER: builtins.int |
| UPPER_BOUND_FIELD_NUMBER: builtins.int |
| WITH_REPLACEMENT_FIELD_NUMBER: builtins.int |
| SEED_FIELD_NUMBER: builtins.int |
| DETERMINISTIC_ORDER_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a Sample.""" |
| lower_bound: builtins.float |
| """(Required) lower bound.""" |
| upper_bound: builtins.float |
| """(Required) upper bound.""" |
| with_replacement: builtins.bool |
| """(Optional) Whether to sample with replacement.""" |
| seed: builtins.int |
| """(Required) The random seed. |
| This filed is required to avoid generate mutable dataframes (see SPARK-48184 for details), |
| however, still keep it 'optional' here for backward compatibility. |
| """ |
| deterministic_order: builtins.bool |
| """(Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it. |
| This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the |
| provided weights. Otherwise, it is false. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| lower_bound: builtins.float = ..., |
| upper_bound: builtins.float = ..., |
| with_replacement: builtins.bool | None = ..., |
| seed: builtins.int | None = ..., |
| deterministic_order: builtins.bool = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_seed", |
| b"_seed", |
| "_with_replacement", |
| b"_with_replacement", |
| "input", |
| b"input", |
| "seed", |
| b"seed", |
| "with_replacement", |
| b"with_replacement", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_seed", |
| b"_seed", |
| "_with_replacement", |
| b"_with_replacement", |
| "deterministic_order", |
| b"deterministic_order", |
| "input", |
| b"input", |
| "lower_bound", |
| b"lower_bound", |
| "seed", |
| b"seed", |
| "upper_bound", |
| b"upper_bound", |
| "with_replacement", |
| b"with_replacement", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_seed", b"_seed"] |
| ) -> typing_extensions.Literal["seed"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_with_replacement", b"_with_replacement"] |
| ) -> typing_extensions.Literal["with_replacement"] | None: ... |
| |
| global___Sample = Sample |
| |
| class Range(google.protobuf.message.Message): |
| """Relation of type [[Range]] that generates a sequence of integers.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| START_FIELD_NUMBER: builtins.int |
| END_FIELD_NUMBER: builtins.int |
| STEP_FIELD_NUMBER: builtins.int |
| NUM_PARTITIONS_FIELD_NUMBER: builtins.int |
| start: builtins.int |
| """(Optional) Default value = 0""" |
| end: builtins.int |
| """(Required)""" |
| step: builtins.int |
| """(Required)""" |
| num_partitions: builtins.int |
| """Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if |
| it is set, or 2) spark default parallelism. |
| """ |
| def __init__( |
| self, |
| *, |
| start: builtins.int | None = ..., |
| end: builtins.int = ..., |
| step: builtins.int = ..., |
| num_partitions: builtins.int | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_num_partitions", |
| b"_num_partitions", |
| "_start", |
| b"_start", |
| "num_partitions", |
| b"num_partitions", |
| "start", |
| b"start", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_num_partitions", |
| b"_num_partitions", |
| "_start", |
| b"_start", |
| "end", |
| b"end", |
| "num_partitions", |
| b"num_partitions", |
| "start", |
| b"start", |
| "step", |
| b"step", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"] |
| ) -> typing_extensions.Literal["num_partitions"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_start", b"_start"] |
| ) -> typing_extensions.Literal["start"] | None: ... |
| |
| global___Range = Range |
| |
| class SubqueryAlias(google.protobuf.message.Message): |
| """Relation alias.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| ALIAS_FIELD_NUMBER: builtins.int |
| QUALIFIER_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation of SubqueryAlias.""" |
| alias: builtins.str |
| """(Required) The alias.""" |
| @property |
| def qualifier( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Qualifier of the alias.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| alias: builtins.str = ..., |
| qualifier: collections.abc.Iterable[builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "alias", b"alias", "input", b"input", "qualifier", b"qualifier" |
| ], |
| ) -> None: ... |
| |
| global___SubqueryAlias = SubqueryAlias |
| |
| class Repartition(google.protobuf.message.Message): |
| """Relation repartition.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| NUM_PARTITIONS_FIELD_NUMBER: builtins.int |
| SHUFFLE_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation of Repartition.""" |
| num_partitions: builtins.int |
| """(Required) Must be positive.""" |
| shuffle: builtins.bool |
| """(Optional) Default value is false.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| num_partitions: builtins.int = ..., |
| shuffle: builtins.bool | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_shuffle", b"_shuffle", "input", b"input", "shuffle", b"shuffle" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_shuffle", |
| b"_shuffle", |
| "input", |
| b"input", |
| "num_partitions", |
| b"num_partitions", |
| "shuffle", |
| b"shuffle", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_shuffle", b"_shuffle"] |
| ) -> typing_extensions.Literal["shuffle"] | None: ... |
| |
| global___Repartition = Repartition |
| |
| class ShowString(google.protobuf.message.Message): |
| """Compose the string representing rows for output. |
| It will invoke 'Dataset.showString' to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| NUM_ROWS_FIELD_NUMBER: builtins.int |
| TRUNCATE_FIELD_NUMBER: builtins.int |
| VERTICAL_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| num_rows: builtins.int |
| """(Required) Number of rows to show.""" |
| truncate: builtins.int |
| """(Required) If set to more than 0, truncates strings to |
| `truncate` characters and all cells will be aligned right. |
| """ |
| vertical: builtins.bool |
| """(Required) If set to true, prints output rows vertically (one line per column value).""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| num_rows: builtins.int = ..., |
| truncate: builtins.int = ..., |
| vertical: builtins.bool = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "input", |
| b"input", |
| "num_rows", |
| b"num_rows", |
| "truncate", |
| b"truncate", |
| "vertical", |
| b"vertical", |
| ], |
| ) -> None: ... |
| |
| global___ShowString = ShowString |
| |
| class HtmlString(google.protobuf.message.Message): |
| """Compose the string representing rows for output. |
| It will invoke 'Dataset.htmlString' to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| NUM_ROWS_FIELD_NUMBER: builtins.int |
| TRUNCATE_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| num_rows: builtins.int |
| """(Required) Number of rows to show.""" |
| truncate: builtins.int |
| """(Required) If set to more than 0, truncates strings to |
| `truncate` characters and all cells will be aligned right. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| num_rows: builtins.int = ..., |
| truncate: builtins.int = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "input", b"input", "num_rows", b"num_rows", "truncate", b"truncate" |
| ], |
| ) -> None: ... |
| |
| global___HtmlString = HtmlString |
| |
| class StatSummary(google.protobuf.message.Message): |
| """Computes specified statistics for numeric and string columns. |
| It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') |
| to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| STATISTICS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def statistics( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Statistics from to be computed. |
| |
| Available statistics are: |
| count |
| mean |
| stddev |
| min |
| max |
| arbitrary approximate percentiles specified as a percentage (e.g. 75%) |
| count_distinct |
| approx_count_distinct |
| |
| If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min', |
| 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| statistics: collections.abc.Iterable[builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["input", b"input", "statistics", b"statistics"] |
| ) -> None: ... |
| |
| global___StatSummary = StatSummary |
| |
| class StatDescribe(google.protobuf.message.Message): |
| """Computes basic statistics for numeric and string columns, including count, mean, stddev, min, |
| and max. If no columns are given, this function computes statistics for all numerical or |
| string columns. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def cols( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Columns to compute statistics on.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| cols: collections.abc.Iterable[builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["cols", b"cols", "input", b"input"] |
| ) -> None: ... |
| |
| global___StatDescribe = StatDescribe |
| |
| class StatCrosstab(google.protobuf.message.Message): |
| """Computes a pair-wise frequency table of the given columns. Also known as a contingency table. |
| It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') |
| to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COL1_FIELD_NUMBER: builtins.int |
| COL2_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| col1: builtins.str |
| """(Required) The name of the first column. |
| |
| Distinct items will make the first item of each row. |
| """ |
| col2: builtins.str |
| """(Required) The name of the second column. |
| |
| Distinct items will make the column names of the DataFrame. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| col1: builtins.str = ..., |
| col2: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal["col1", b"col1", "col2", b"col2", "input", b"input"], |
| ) -> None: ... |
| |
| global___StatCrosstab = StatCrosstab |
| |
| class StatCov(google.protobuf.message.Message): |
| """Calculate the sample covariance of two numerical columns of a DataFrame. |
| It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COL1_FIELD_NUMBER: builtins.int |
| COL2_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| col1: builtins.str |
| """(Required) The name of the first column.""" |
| col2: builtins.str |
| """(Required) The name of the second column.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| col1: builtins.str = ..., |
| col2: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal["col1", b"col1", "col2", b"col2", "input", b"input"], |
| ) -> None: ... |
| |
| global___StatCov = StatCov |
| |
| class StatCorr(google.protobuf.message.Message): |
| """Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson |
| Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as |
| 'StatFunctions.pearsonCorrelation') to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COL1_FIELD_NUMBER: builtins.int |
| COL2_FIELD_NUMBER: builtins.int |
| METHOD_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| col1: builtins.str |
| """(Required) The name of the first column.""" |
| col2: builtins.str |
| """(Required) The name of the second column.""" |
| method: builtins.str |
| """(Optional) Default value is 'pearson'. |
| |
| Currently only supports the Pearson Correlation Coefficient. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| col1: builtins.str = ..., |
| col2: builtins.str = ..., |
| method: builtins.str | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_method", b"_method", "input", b"input", "method", b"method" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_method", |
| b"_method", |
| "col1", |
| b"col1", |
| "col2", |
| b"col2", |
| "input", |
| b"input", |
| "method", |
| b"method", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_method", b"_method"] |
| ) -> typing_extensions.Literal["method"] | None: ... |
| |
| global___StatCorr = StatCorr |
| |
| class StatApproxQuantile(google.protobuf.message.Message): |
| """Calculates the approximate quantiles of numerical columns of a DataFrame. |
| It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') |
| to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLS_FIELD_NUMBER: builtins.int |
| PROBABILITIES_FIELD_NUMBER: builtins.int |
| RELATIVE_ERROR_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def cols( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Required) The names of the numerical columns.""" |
| @property |
| def probabilities( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: |
| """(Required) A list of quantile probabilities. |
| |
| Each number must belong to [0, 1]. |
| For example 0 is the minimum, 0.5 is the median, 1 is the maximum. |
| """ |
| relative_error: builtins.float |
| """(Required) The relative target precision to achieve (greater than or equal to 0). |
| |
| If set to zero, the exact quantiles are computed, which could be very expensive. |
| Note that values greater than 1 are accepted but give the same result as 1. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| cols: collections.abc.Iterable[builtins.str] | None = ..., |
| probabilities: collections.abc.Iterable[builtins.float] | None = ..., |
| relative_error: builtins.float = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "cols", |
| b"cols", |
| "input", |
| b"input", |
| "probabilities", |
| b"probabilities", |
| "relative_error", |
| b"relative_error", |
| ], |
| ) -> None: ... |
| |
| global___StatApproxQuantile = StatApproxQuantile |
| |
| class StatFreqItems(google.protobuf.message.Message): |
| """Finding frequent items for columns, possibly with false positives. |
| It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') |
| to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLS_FIELD_NUMBER: builtins.int |
| SUPPORT_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def cols( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Required) The names of the columns to search frequent items in.""" |
| support: builtins.float |
| """(Optional) The minimum frequency for an item to be considered `frequent`. |
| Should be greater than 1e-4. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| cols: collections.abc.Iterable[builtins.str] | None = ..., |
| support: builtins.float | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_support", b"_support", "input", b"input", "support", b"support" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_support", b"_support", "cols", b"cols", "input", b"input", "support", b"support" |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_support", b"_support"] |
| ) -> typing_extensions.Literal["support"] | None: ... |
| |
| global___StatFreqItems = StatFreqItems |
| |
| class StatSampleBy(google.protobuf.message.Message): |
| """Returns a stratified sample without replacement based on the fraction |
| given on each stratum. |
| It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') |
| to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class Fraction(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| STRATUM_FIELD_NUMBER: builtins.int |
| FRACTION_FIELD_NUMBER: builtins.int |
| @property |
| def stratum(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: |
| """(Required) The stratum.""" |
| fraction: builtins.float |
| """(Required) The fraction value. Must be in [0, 1].""" |
| def __init__( |
| self, |
| *, |
| stratum: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., |
| fraction: builtins.float = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["stratum", b"stratum"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal["fraction", b"fraction", "stratum", b"stratum"], |
| ) -> None: ... |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COL_FIELD_NUMBER: builtins.int |
| FRACTIONS_FIELD_NUMBER: builtins.int |
| SEED_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def col(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Required) The column that defines strata.""" |
| @property |
| def fractions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| global___StatSampleBy.Fraction |
| ]: |
| """(Required) Sampling fraction for each stratum. |
| |
| If a stratum is not specified, we treat its fraction as zero. |
| """ |
| seed: builtins.int |
| """(Required) The random seed. |
| This filed is required to avoid generate mutable dataframes (see SPARK-48184 for details), |
| however, still keep it 'optional' here for backward compatibility. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| col: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| fractions: collections.abc.Iterable[global___StatSampleBy.Fraction] | None = ..., |
| seed: builtins.int | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_seed", b"_seed", "col", b"col", "input", b"input", "seed", b"seed" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_seed", |
| b"_seed", |
| "col", |
| b"col", |
| "fractions", |
| b"fractions", |
| "input", |
| b"input", |
| "seed", |
| b"seed", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_seed", b"_seed"] |
| ) -> typing_extensions.Literal["seed"] | None: ... |
| |
| global___StatSampleBy = StatSampleBy |
| |
| class NAFill(google.protobuf.message.Message): |
| """Replaces null values. |
| It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. |
| Following 3 parameter combinations are supported: |
| 1, 'values' only contains 1 item, 'cols' is empty: |
| replaces null values in all type-compatible columns. |
| 2, 'values' only contains 1 item, 'cols' is not empty: |
| replaces null values in specified columns. |
| 3, 'values' contains more than 1 items, then 'cols' is required to have the same length: |
| replaces each specified column with corresponding value. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLS_FIELD_NUMBER: builtins.int |
| VALUES_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def cols( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Optional list of column names to consider.""" |
| @property |
| def values( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ]: |
| """(Required) Values to replace null values with. |
| |
| Should contain at least 1 item. |
| Only 4 data types are supported now: bool, long, double, string |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| cols: collections.abc.Iterable[builtins.str] | None = ..., |
| values: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Literal |
| ] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "cols", b"cols", "input", b"input", "values", b"values" |
| ], |
| ) -> None: ... |
| |
| global___NAFill = NAFill |
| |
| class NADrop(google.protobuf.message.Message): |
| """Drop rows containing null values. |
| It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLS_FIELD_NUMBER: builtins.int |
| MIN_NON_NULLS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def cols( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) Optional list of column names to consider. |
| |
| When it is empty, all the columns in the input relation will be considered. |
| """ |
| min_non_nulls: builtins.int |
| """(Optional) The minimum number of non-null and non-NaN values required to keep. |
| |
| When not set, it is equivalent to the number of considered columns, which means |
| a row will be kept only if all columns are non-null. |
| |
| 'how' options ('all', 'any') can be easily converted to this field: |
| - 'all' -> set 'min_non_nulls' 1; |
| - 'any' -> keep 'min_non_nulls' unset; |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| cols: collections.abc.Iterable[builtins.str] | None = ..., |
| min_non_nulls: builtins.int | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_min_non_nulls", |
| b"_min_non_nulls", |
| "input", |
| b"input", |
| "min_non_nulls", |
| b"min_non_nulls", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_min_non_nulls", |
| b"_min_non_nulls", |
| "cols", |
| b"cols", |
| "input", |
| b"input", |
| "min_non_nulls", |
| b"min_non_nulls", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_min_non_nulls", b"_min_non_nulls"] |
| ) -> typing_extensions.Literal["min_non_nulls"] | None: ... |
| |
| global___NADrop = NADrop |
| |
| class NAReplace(google.protobuf.message.Message): |
| """Replaces old values with the corresponding values. |
| It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') |
| to compute the results. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class Replacement(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| OLD_VALUE_FIELD_NUMBER: builtins.int |
| NEW_VALUE_FIELD_NUMBER: builtins.int |
| @property |
| def old_value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: |
| """(Required) The old value. |
| |
| Only 4 data types are supported now: null, bool, double, string. |
| """ |
| @property |
| def new_value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: |
| """(Required) The new value. |
| |
| Should be of the same data type with the old value. |
| """ |
| def __init__( |
| self, |
| *, |
| old_value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., |
| new_value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "new_value", b"new_value", "old_value", b"old_value" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "new_value", b"new_value", "old_value", b"old_value" |
| ], |
| ) -> None: ... |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLS_FIELD_NUMBER: builtins.int |
| REPLACEMENTS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def cols( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Optional) List of column names to consider. |
| |
| When it is empty, all the type-compatible columns in the input relation will be considered. |
| """ |
| @property |
| def replacements( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| global___NAReplace.Replacement |
| ]: |
| """(Optional) The value replacement mapping.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| cols: collections.abc.Iterable[builtins.str] | None = ..., |
| replacements: collections.abc.Iterable[global___NAReplace.Replacement] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "cols", b"cols", "input", b"input", "replacements", b"replacements" |
| ], |
| ) -> None: ... |
| |
| global___NAReplace = NAReplace |
| |
| class ToDF(google.protobuf.message.Message): |
| """Rename columns on the input relation by the same length of names.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| COLUMN_NAMES_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation of RenameColumnsBySameLengthNames.""" |
| @property |
| def column_names( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """(Required) |
| |
| The number of columns of the input relation must be equal to the length |
| of this field. If this is not true, an exception will be returned. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| column_names: collections.abc.Iterable[builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal["column_names", b"column_names", "input", b"input"], |
| ) -> None: ... |
| |
| global___ToDF = ToDF |
| |
| class WithColumnsRenamed(google.protobuf.message.Message): |
| """Rename columns on the input relation by a map with name to name mapping.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class RenameColumnsMapEntry(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| KEY_FIELD_NUMBER: builtins.int |
| VALUE_FIELD_NUMBER: builtins.int |
| key: builtins.str |
| value: builtins.str |
| def __init__( |
| self, |
| *, |
| key: builtins.str = ..., |
| value: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] |
| ) -> None: ... |
| |
| class Rename(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| COL_NAME_FIELD_NUMBER: builtins.int |
| NEW_COL_NAME_FIELD_NUMBER: builtins.int |
| col_name: builtins.str |
| """(Required) The existing column name.""" |
| new_col_name: builtins.str |
| """(Required) The new column name.""" |
| def __init__( |
| self, |
| *, |
| col_name: builtins.str = ..., |
| new_col_name: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "col_name", b"col_name", "new_col_name", b"new_col_name" |
| ], |
| ) -> None: ... |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| RENAME_COLUMNS_MAP_FIELD_NUMBER: builtins.int |
| RENAMES_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def rename_columns_map( |
| self, |
| ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: |
| """(Optional) |
| |
| Renaming column names of input relation from A to B where A is the map key |
| and B is the map value. This is a no-op if schema doesn't contain any A. It |
| does not require that all input relation column names to present as keys. |
| duplicated B are not allowed. |
| """ |
| @property |
| def renames( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| global___WithColumnsRenamed.Rename |
| ]: ... |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| rename_columns_map: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., |
| renames: collections.abc.Iterable[global___WithColumnsRenamed.Rename] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "input", b"input", "rename_columns_map", b"rename_columns_map", "renames", b"renames" |
| ], |
| ) -> None: ... |
| |
| global___WithColumnsRenamed = WithColumnsRenamed |
| |
| class WithColumns(google.protobuf.message.Message): |
| """Adding columns or replacing the existing columns that have the same names.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| ALIASES_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def aliases( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Alias |
| ]: |
| """(Required) |
| |
| Given a column name, apply the corresponding expression on the column. If column |
| name exists in the input relation, then replace the column. If the column name |
| does not exist in the input relation, then adds it as a new column. |
| |
| Only one name part is expected from each Expression.Alias. |
| |
| An exception is thrown when duplicated names are present in the mapping. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| aliases: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression.Alias |
| ] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["aliases", b"aliases", "input", b"input"] |
| ) -> None: ... |
| |
| global___WithColumns = WithColumns |
| |
| class WithWatermark(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| EVENT_TIME_FIELD_NUMBER: builtins.int |
| DELAY_THRESHOLD_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation""" |
| event_time: builtins.str |
| """(Required) Name of the column containing event time.""" |
| delay_threshold: builtins.str |
| """(Required)""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| event_time: builtins.str = ..., |
| delay_threshold: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "delay_threshold", b"delay_threshold", "event_time", b"event_time", "input", b"input" |
| ], |
| ) -> None: ... |
| |
| global___WithWatermark = WithWatermark |
| |
| class Hint(google.protobuf.message.Message): |
| """Specify a hint over a relation. Hint should have a name and optional parameters.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| NAME_FIELD_NUMBER: builtins.int |
| PARAMETERS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| name: builtins.str |
| """(Required) Hint name. |
| |
| Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL. |
| |
| Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE. |
| """ |
| @property |
| def parameters( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) Hint parameters.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| name: builtins.str = ..., |
| parameters: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "input", b"input", "name", b"name", "parameters", b"parameters" |
| ], |
| ) -> None: ... |
| |
| global___Hint = Hint |
| |
| class Unpivot(google.protobuf.message.Message): |
| """Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class Values(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| VALUES_FIELD_NUMBER: builtins.int |
| @property |
| def values( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: ... |
| def __init__( |
| self, |
| *, |
| values: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["values", b"values"] |
| ) -> None: ... |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| IDS_FIELD_NUMBER: builtins.int |
| VALUES_FIELD_NUMBER: builtins.int |
| VARIABLE_COLUMN_NAME_FIELD_NUMBER: builtins.int |
| VALUE_COLUMN_NAME_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def ids( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) Id columns.""" |
| @property |
| def values(self) -> global___Unpivot.Values: |
| """(Optional) Value columns to unpivot.""" |
| variable_column_name: builtins.str |
| """(Required) Name of the variable column.""" |
| value_column_name: builtins.str |
| """(Required) Name of the value column.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| ids: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| values: global___Unpivot.Values | None = ..., |
| variable_column_name: builtins.str = ..., |
| value_column_name: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_values", b"_values", "input", b"input", "values", b"values" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_values", |
| b"_values", |
| "ids", |
| b"ids", |
| "input", |
| b"input", |
| "value_column_name", |
| b"value_column_name", |
| "values", |
| b"values", |
| "variable_column_name", |
| b"variable_column_name", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_values", b"_values"] |
| ) -> typing_extensions.Literal["values"] | None: ... |
| |
| global___Unpivot = Unpivot |
| |
| class ToSchema(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| SCHEMA_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType: |
| """(Required) The user provided schema. |
| |
| The Sever side will update the dataframe with this schema. |
| """ |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input", "schema", b"schema"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["input", b"input", "schema", b"schema"] |
| ) -> None: ... |
| |
| global___ToSchema = ToSchema |
| |
| class RepartitionByExpression(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| PARTITION_EXPRS_FIELD_NUMBER: builtins.int |
| NUM_PARTITIONS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| @property |
| def partition_exprs( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) The partitioning expressions.""" |
| num_partitions: builtins.int |
| """(Optional) number of partitions, must be positive.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| partition_exprs: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| num_partitions: builtins.int | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_num_partitions", |
| b"_num_partitions", |
| "input", |
| b"input", |
| "num_partitions", |
| b"num_partitions", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_num_partitions", |
| b"_num_partitions", |
| "input", |
| b"input", |
| "num_partitions", |
| b"num_partitions", |
| "partition_exprs", |
| b"partition_exprs", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"] |
| ) -> typing_extensions.Literal["num_partitions"] | None: ... |
| |
| global___RepartitionByExpression = RepartitionByExpression |
| |
| class MapPartitions(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| FUNC_FIELD_NUMBER: builtins.int |
| IS_BARRIER_FIELD_NUMBER: builtins.int |
| PROFILE_ID_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow.""" |
| @property |
| def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: |
| """(Required) Input user-defined function.""" |
| is_barrier: builtins.bool |
| """(Optional) Whether to use barrier mode execution or not.""" |
| profile_id: builtins.int |
| """(Optional) ResourceProfile id used for the stage level scheduling.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction |
| | None = ..., |
| is_barrier: builtins.bool | None = ..., |
| profile_id: builtins.int | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_is_barrier", |
| b"_is_barrier", |
| "_profile_id", |
| b"_profile_id", |
| "func", |
| b"func", |
| "input", |
| b"input", |
| "is_barrier", |
| b"is_barrier", |
| "profile_id", |
| b"profile_id", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_is_barrier", |
| b"_is_barrier", |
| "_profile_id", |
| b"_profile_id", |
| "func", |
| b"func", |
| "input", |
| b"input", |
| "is_barrier", |
| b"is_barrier", |
| "profile_id", |
| b"profile_id", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_is_barrier", b"_is_barrier"] |
| ) -> typing_extensions.Literal["is_barrier"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_profile_id", b"_profile_id"] |
| ) -> typing_extensions.Literal["profile_id"] | None: ... |
| |
| global___MapPartitions = MapPartitions |
| |
| class GroupMap(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| FUNC_FIELD_NUMBER: builtins.int |
| SORTING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| INITIAL_INPUT_FIELD_NUMBER: builtins.int |
| INITIAL_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| IS_MAP_GROUPS_WITH_STATE_FIELD_NUMBER: builtins.int |
| OUTPUT_MODE_FIELD_NUMBER: builtins.int |
| TIMEOUT_CONF_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for Group Map API: apply, applyInPandas.""" |
| @property |
| def grouping_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) Expressions for grouping keys.""" |
| @property |
| def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: |
| """(Required) Input user-defined function.""" |
| @property |
| def sorting_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) Expressions for sorting. Only used by Scala Sorted Group Map API.""" |
| @property |
| def initial_input(self) -> global___Relation: |
| """Below fields are only used by (Flat)MapGroupsWithState |
| (Optional) Input relation for initial State. |
| """ |
| @property |
| def initial_grouping_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) Expressions for grouping keys of the initial state input relation.""" |
| is_map_groups_with_state: builtins.bool |
| """(Optional) True if MapGroupsWithState, false if FlatMapGroupsWithState.""" |
| output_mode: builtins.str |
| """(Optional) The output mode of the function.""" |
| timeout_conf: builtins.str |
| """(Optional) Timeout configuration for groups that do not receive data for a while.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| grouping_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction |
| | None = ..., |
| sorting_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| initial_input: global___Relation | None = ..., |
| initial_grouping_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| is_map_groups_with_state: builtins.bool | None = ..., |
| output_mode: builtins.str | None = ..., |
| timeout_conf: builtins.str | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_is_map_groups_with_state", |
| b"_is_map_groups_with_state", |
| "_output_mode", |
| b"_output_mode", |
| "_timeout_conf", |
| b"_timeout_conf", |
| "func", |
| b"func", |
| "initial_input", |
| b"initial_input", |
| "input", |
| b"input", |
| "is_map_groups_with_state", |
| b"is_map_groups_with_state", |
| "output_mode", |
| b"output_mode", |
| "timeout_conf", |
| b"timeout_conf", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_is_map_groups_with_state", |
| b"_is_map_groups_with_state", |
| "_output_mode", |
| b"_output_mode", |
| "_timeout_conf", |
| b"_timeout_conf", |
| "func", |
| b"func", |
| "grouping_expressions", |
| b"grouping_expressions", |
| "initial_grouping_expressions", |
| b"initial_grouping_expressions", |
| "initial_input", |
| b"initial_input", |
| "input", |
| b"input", |
| "is_map_groups_with_state", |
| b"is_map_groups_with_state", |
| "output_mode", |
| b"output_mode", |
| "sorting_expressions", |
| b"sorting_expressions", |
| "timeout_conf", |
| b"timeout_conf", |
| ], |
| ) -> None: ... |
| @typing.overload |
| def WhichOneof( |
| self, |
| oneof_group: typing_extensions.Literal[ |
| "_is_map_groups_with_state", b"_is_map_groups_with_state" |
| ], |
| ) -> typing_extensions.Literal["is_map_groups_with_state"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_output_mode", b"_output_mode"] |
| ) -> typing_extensions.Literal["output_mode"] | None: ... |
| @typing.overload |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_timeout_conf", b"_timeout_conf"] |
| ) -> typing_extensions.Literal["timeout_conf"] | None: ... |
| |
| global___GroupMap = GroupMap |
| |
| class CoGroupMap(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| INPUT_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| OTHER_FIELD_NUMBER: builtins.int |
| OTHER_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| FUNC_FIELD_NUMBER: builtins.int |
| INPUT_SORTING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| OTHER_SORTING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) One input relation for CoGroup Map API - applyInPandas.""" |
| @property |
| def input_grouping_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """Expressions for grouping keys of the first input relation.""" |
| @property |
| def other(self) -> global___Relation: |
| """(Required) The other input relation.""" |
| @property |
| def other_grouping_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """Expressions for grouping keys of the other input relation.""" |
| @property |
| def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: |
| """(Required) Input user-defined function.""" |
| @property |
| def input_sorting_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API.""" |
| @property |
| def other_sorting_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| input_grouping_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| other: global___Relation | None = ..., |
| other_grouping_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction |
| | None = ..., |
| input_sorting_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| other_sorting_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "func", b"func", "input", b"input", "other", b"other" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "func", |
| b"func", |
| "input", |
| b"input", |
| "input_grouping_expressions", |
| b"input_grouping_expressions", |
| "input_sorting_expressions", |
| b"input_sorting_expressions", |
| "other", |
| b"other", |
| "other_grouping_expressions", |
| b"other_grouping_expressions", |
| "other_sorting_expressions", |
| b"other_sorting_expressions", |
| ], |
| ) -> None: ... |
| |
| global___CoGroupMap = CoGroupMap |
| |
| class ApplyInPandasWithState(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int |
| FUNC_FIELD_NUMBER: builtins.int |
| OUTPUT_SCHEMA_FIELD_NUMBER: builtins.int |
| STATE_SCHEMA_FIELD_NUMBER: builtins.int |
| OUTPUT_MODE_FIELD_NUMBER: builtins.int |
| TIMEOUT_CONF_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation for applyInPandasWithState.""" |
| @property |
| def grouping_expressions( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) Expressions for grouping keys.""" |
| @property |
| def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: |
| """(Required) Input user-defined function.""" |
| output_schema: builtins.str |
| """(Required) Schema for the output DataFrame.""" |
| state_schema: builtins.str |
| """(Required) Schema for the state.""" |
| output_mode: builtins.str |
| """(Required) The output mode of the function.""" |
| timeout_conf: builtins.str |
| """(Required) Timeout configuration for groups that do not receive data for a while.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| grouping_expressions: collections.abc.Iterable[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ] |
| | None = ..., |
| func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction |
| | None = ..., |
| output_schema: builtins.str = ..., |
| state_schema: builtins.str = ..., |
| output_mode: builtins.str = ..., |
| timeout_conf: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["func", b"func", "input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "func", |
| b"func", |
| "grouping_expressions", |
| b"grouping_expressions", |
| "input", |
| b"input", |
| "output_mode", |
| b"output_mode", |
| "output_schema", |
| b"output_schema", |
| "state_schema", |
| b"state_schema", |
| "timeout_conf", |
| b"timeout_conf", |
| ], |
| ) -> None: ... |
| |
| global___ApplyInPandasWithState = ApplyInPandasWithState |
| |
| class CommonInlineUserDefinedTableFunction(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| FUNCTION_NAME_FIELD_NUMBER: builtins.int |
| DETERMINISTIC_FIELD_NUMBER: builtins.int |
| ARGUMENTS_FIELD_NUMBER: builtins.int |
| PYTHON_UDTF_FIELD_NUMBER: builtins.int |
| function_name: builtins.str |
| """(Required) Name of the user-defined table function.""" |
| deterministic: builtins.bool |
| """(Optional) Whether the user-defined table function is deterministic.""" |
| @property |
| def arguments( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Optional) Function input arguments. Empty arguments are allowed.""" |
| @property |
| def python_udtf(self) -> global___PythonUDTF: ... |
| def __init__( |
| self, |
| *, |
| function_name: builtins.str = ..., |
| deterministic: builtins.bool = ..., |
| arguments: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| python_udtf: global___PythonUDTF | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "function", b"function", "python_udtf", b"python_udtf" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "arguments", |
| b"arguments", |
| "deterministic", |
| b"deterministic", |
| "function", |
| b"function", |
| "function_name", |
| b"function_name", |
| "python_udtf", |
| b"python_udtf", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["function", b"function"] |
| ) -> typing_extensions.Literal["python_udtf"] | None: ... |
| |
| global___CommonInlineUserDefinedTableFunction = CommonInlineUserDefinedTableFunction |
| |
| class PythonUDTF(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| RETURN_TYPE_FIELD_NUMBER: builtins.int |
| EVAL_TYPE_FIELD_NUMBER: builtins.int |
| COMMAND_FIELD_NUMBER: builtins.int |
| PYTHON_VER_FIELD_NUMBER: builtins.int |
| @property |
| def return_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: |
| """(Optional) Return type of the Python UDTF.""" |
| eval_type: builtins.int |
| """(Required) EvalType of the Python UDTF.""" |
| command: builtins.bytes |
| """(Required) The encoded commands of the Python UDTF.""" |
| python_ver: builtins.str |
| """(Required) Python version being used in the client.""" |
| def __init__( |
| self, |
| *, |
| return_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., |
| eval_type: builtins.int = ..., |
| command: builtins.bytes = ..., |
| python_ver: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_return_type", b"_return_type", "return_type", b"return_type" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_return_type", |
| b"_return_type", |
| "command", |
| b"command", |
| "eval_type", |
| b"eval_type", |
| "python_ver", |
| b"python_ver", |
| "return_type", |
| b"return_type", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_return_type", b"_return_type"] |
| ) -> typing_extensions.Literal["return_type"] | None: ... |
| |
| global___PythonUDTF = PythonUDTF |
| |
| class CommonInlineUserDefinedDataSource(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| NAME_FIELD_NUMBER: builtins.int |
| PYTHON_DATA_SOURCE_FIELD_NUMBER: builtins.int |
| name: builtins.str |
| """(Required) Name of the data source.""" |
| @property |
| def python_data_source(self) -> global___PythonDataSource: ... |
| def __init__( |
| self, |
| *, |
| name: builtins.str = ..., |
| python_data_source: global___PythonDataSource | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "data_source", b"data_source", "python_data_source", b"python_data_source" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "data_source", |
| b"data_source", |
| "name", |
| b"name", |
| "python_data_source", |
| b"python_data_source", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["data_source", b"data_source"] |
| ) -> typing_extensions.Literal["python_data_source"] | None: ... |
| |
| global___CommonInlineUserDefinedDataSource = CommonInlineUserDefinedDataSource |
| |
| class PythonDataSource(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| COMMAND_FIELD_NUMBER: builtins.int |
| PYTHON_VER_FIELD_NUMBER: builtins.int |
| command: builtins.bytes |
| """(Required) The encoded commands of the Python data source.""" |
| python_ver: builtins.str |
| """(Required) Python version being used in the client.""" |
| def __init__( |
| self, |
| *, |
| command: builtins.bytes = ..., |
| python_ver: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal["command", b"command", "python_ver", b"python_ver"], |
| ) -> None: ... |
| |
| global___PythonDataSource = PythonDataSource |
| |
| class CollectMetrics(google.protobuf.message.Message): |
| """Collect arbitrary (named) metrics from a dataset.""" |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| NAME_FIELD_NUMBER: builtins.int |
| METRICS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) The input relation.""" |
| name: builtins.str |
| """(Required) Name of the metrics.""" |
| @property |
| def metrics( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ |
| pyspark.sql.connect.proto.expressions_pb2.Expression |
| ]: |
| """(Required) The metric sequence.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| name: builtins.str = ..., |
| metrics: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] |
| | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, field_name: typing_extensions.Literal["input", b"input"] |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "input", b"input", "metrics", b"metrics", "name", b"name" |
| ], |
| ) -> None: ... |
| |
| global___CollectMetrics = CollectMetrics |
| |
| class Parse(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| class _ParseFormat: |
| ValueType = typing.NewType("ValueType", builtins.int) |
| V: typing_extensions.TypeAlias = ValueType |
| |
| class _ParseFormatEnumTypeWrapper( |
| google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Parse._ParseFormat.ValueType], |
| builtins.type, |
| ): # noqa: F821 |
| DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor |
| PARSE_FORMAT_UNSPECIFIED: Parse._ParseFormat.ValueType # 0 |
| PARSE_FORMAT_CSV: Parse._ParseFormat.ValueType # 1 |
| PARSE_FORMAT_JSON: Parse._ParseFormat.ValueType # 2 |
| |
| class ParseFormat(_ParseFormat, metaclass=_ParseFormatEnumTypeWrapper): ... |
| PARSE_FORMAT_UNSPECIFIED: Parse.ParseFormat.ValueType # 0 |
| PARSE_FORMAT_CSV: Parse.ParseFormat.ValueType # 1 |
| PARSE_FORMAT_JSON: Parse.ParseFormat.ValueType # 2 |
| |
| class OptionsEntry(google.protobuf.message.Message): |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| KEY_FIELD_NUMBER: builtins.int |
| VALUE_FIELD_NUMBER: builtins.int |
| key: builtins.str |
| value: builtins.str |
| def __init__( |
| self, |
| *, |
| key: builtins.str = ..., |
| value: builtins.str = ..., |
| ) -> None: ... |
| def ClearField( |
| self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] |
| ) -> None: ... |
| |
| INPUT_FIELD_NUMBER: builtins.int |
| FORMAT_FIELD_NUMBER: builtins.int |
| SCHEMA_FIELD_NUMBER: builtins.int |
| OPTIONS_FIELD_NUMBER: builtins.int |
| @property |
| def input(self) -> global___Relation: |
| """(Required) Input relation to Parse. The input is expected to have single text column.""" |
| format: global___Parse.ParseFormat.ValueType |
| """(Required) The expected format of the text.""" |
| @property |
| def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType: |
| """(Optional) DataType representing the schema. If not set, Spark will infer the schema.""" |
| @property |
| def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: |
| """Options for the csv/json parser. The map key is case insensitive.""" |
| def __init__( |
| self, |
| *, |
| input: global___Relation | None = ..., |
| format: global___Parse.ParseFormat.ValueType = ..., |
| schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., |
| options: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_schema", b"_schema", "input", b"input", "schema", b"schema" |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "_schema", |
| b"_schema", |
| "format", |
| b"format", |
| "input", |
| b"input", |
| "options", |
| b"options", |
| "schema", |
| b"schema", |
| ], |
| ) -> None: ... |
| def WhichOneof( |
| self, oneof_group: typing_extensions.Literal["_schema", b"_schema"] |
| ) -> typing_extensions.Literal["schema"] | None: ... |
| |
| global___Parse = Parse |
| |
| class AsOfJoin(google.protobuf.message.Message): |
| """Relation of type [[AsOfJoin]]. |
| |
| `left` and `right` must be present. |
| """ |
| |
| DESCRIPTOR: google.protobuf.descriptor.Descriptor |
| |
| LEFT_FIELD_NUMBER: builtins.int |
| RIGHT_FIELD_NUMBER: builtins.int |
| LEFT_AS_OF_FIELD_NUMBER: builtins.int |
| RIGHT_AS_OF_FIELD_NUMBER: builtins.int |
| JOIN_EXPR_FIELD_NUMBER: builtins.int |
| USING_COLUMNS_FIELD_NUMBER: builtins.int |
| JOIN_TYPE_FIELD_NUMBER: builtins.int |
| TOLERANCE_FIELD_NUMBER: builtins.int |
| ALLOW_EXACT_MATCHES_FIELD_NUMBER: builtins.int |
| DIRECTION_FIELD_NUMBER: builtins.int |
| @property |
| def left(self) -> global___Relation: |
| """(Required) Left input relation for a Join.""" |
| @property |
| def right(self) -> global___Relation: |
| """(Required) Right input relation for a Join.""" |
| @property |
| def left_as_of(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Required) Field to join on in left DataFrame""" |
| @property |
| def right_as_of(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Required) Field to join on in right DataFrame""" |
| @property |
| def join_expr(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Optional) The join condition. Could be unset when `using_columns` is utilized. |
| |
| This field does not co-exist with using_columns. |
| """ |
| @property |
| def using_columns( |
| self, |
| ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: |
| """Optional. using_columns provides a list of columns that should present on both sides of |
| the join inputs that this Join will join on. For example A JOIN B USING col_name is |
| equivalent to A JOIN B on A.col_name = B.col_name. |
| |
| This field does not co-exist with join_condition. |
| """ |
| join_type: builtins.str |
| """(Required) The join type.""" |
| @property |
| def tolerance(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: |
| """(Optional) The asof tolerance within this range.""" |
| allow_exact_matches: builtins.bool |
| """(Required) Whether allow matching with the same value or not.""" |
| direction: builtins.str |
| """(Required) Whether to search for prior, subsequent, or closest matches.""" |
| def __init__( |
| self, |
| *, |
| left: global___Relation | None = ..., |
| right: global___Relation | None = ..., |
| left_as_of: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| right_as_of: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| join_expr: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| using_columns: collections.abc.Iterable[builtins.str] | None = ..., |
| join_type: builtins.str = ..., |
| tolerance: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., |
| allow_exact_matches: builtins.bool = ..., |
| direction: builtins.str = ..., |
| ) -> None: ... |
| def HasField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "join_expr", |
| b"join_expr", |
| "left", |
| b"left", |
| "left_as_of", |
| b"left_as_of", |
| "right", |
| b"right", |
| "right_as_of", |
| b"right_as_of", |
| "tolerance", |
| b"tolerance", |
| ], |
| ) -> builtins.bool: ... |
| def ClearField( |
| self, |
| field_name: typing_extensions.Literal[ |
| "allow_exact_matches", |
| b"allow_exact_matches", |
| "direction", |
| b"direction", |
| "join_expr", |
| b"join_expr", |
| "join_type", |
| b"join_type", |
| "left", |
| b"left", |
| "left_as_of", |
| b"left_as_of", |
| "right", |
| b"right", |
| "right_as_of", |
| b"right_as_of", |
| "tolerance", |
| b"tolerance", |
| "using_columns", |
| b"using_columns", |
| ], |
| ) -> None: ... |
| |
| global___AsOfJoin = AsOfJoin |