blob: d5904939b8d46bdd881c0bffb67c2ae98b9fa0b6 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
- case: createDataFrameStructsValid
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
spark = SparkSession.builder.getOrCreate()
data = [('Alice', 1)]
schema = StructType([
StructField("name", StringType(), True),
StructField("age", IntegerType(), True)
])
# Valid structs
spark.createDataFrame(data)
spark.createDataFrame(data, samplingRatio=0.1)
spark.createDataFrame(data, ("name", "age"))
spark.createDataFrame(data, schema)
spark.createDataFrame(data, "name string, age integer")
spark.createDataFrame([(1, ("foo", "bar"))], ("_1", "_2"))
spark.createDataFrame(data, ("name", "age"), samplingRatio=0.1)
- case: createDataFrameScalarsValid
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
spark = SparkSession.builder.getOrCreate()
# Scalars
spark.createDataFrame([1, 2, 3], IntegerType())
spark.createDataFrame(["foo", "bar"], "string")
- case: createDataFrameStructsInvalid
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
spark = SparkSession.builder.getOrCreate()
data = [('Alice', 1)]
schema = StructType([
StructField("name", StringType(), True),
StructField("age", IntegerType(), True)
])
# Invalid product should have StructType schema
spark.createDataFrame(data, IntegerType())
# This shouldn't type check, though is technically speaking valid
# because samplingRatio is ignored
spark.createDataFrame(data, schema, samplingRatio=0.1)
out: |
main:14: error: Value of type variable "AtomicValue" of "createDataFrame" of "SparkSession" cannot be "tuple[str, int]" [type-var]
main:18: error: No overload variant of "createDataFrame" of "SparkSession" matches argument types "list[tuple[str, int]]", "StructType", "float" [call-overload]
main:18: note: Possible overload variants:
main:18: note: def [RowLike: (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: list[str] | tuple[str, ...] = ..., samplingRatio: float | None = ...) -> DataFrame
main:18: note: def [RowLike: (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: list[str] | tuple[str, ...] = ..., samplingRatio: float | None = ...) -> DataFrame
main:18: note: def [RowLike: (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: StructType | str, *, verifySchema: bool = ...) -> DataFrame
main:18: note: def [RowLike: (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: StructType | str, *, verifySchema: bool = ...) -> DataFrame
main:18: note: def [AtomicValue: (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: RDD[AtomicValue], schema: AtomicType | str, verifySchema: bool = ...) -> DataFrame
main:18: note: def [AtomicValue: (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: Iterable[AtomicValue], schema: AtomicType | str, verifySchema: bool = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: DataFrame, samplingRatio: float | None = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: Any, samplingRatio: float | None = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: DataFrame, schema: StructType | str, verifySchema: bool = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: Any, schema: StructType | str, verifySchema: bool = ...) -> DataFrame
- case: createDataFrameFromEmptyRdd
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType
spark = SparkSession.builder.getOrCreate()
spark.createDataFrame(
spark.sparkContext.emptyRDD(),
schema=StructType(),
)