blob: d6eee82a7678e63036eb0d1a665ca49eb2078f84 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
- case: createDataFrameStructsValid
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
spark = SparkSession.builder.getOrCreate()
data = [('Alice', 1)]
schema = StructType([
StructField("name", StringType(), True),
StructField("age", IntegerType(), True)
])
# Valid structs
spark.createDataFrame(data)
spark.createDataFrame(data, samplingRatio=0.1)
spark.createDataFrame(data, ("name", "age"))
spark.createDataFrame(data, schema)
spark.createDataFrame(data, "name string, age integer")
spark.createDataFrame([(1, ("foo", "bar"))], ("_1", "_2"))
spark.createDataFrame(data, ("name", "age"), samplingRatio=0.1)
- case: createDataFrameScalarsValid
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
spark = SparkSession.builder.getOrCreate()
# Scalars
spark.createDataFrame([1, 2, 3], IntegerType())
spark.createDataFrame(["foo", "bar"], "string")
- case: createDataFrameStructsInvalid
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
spark = SparkSession.builder.getOrCreate()
data = [('Alice', 1)]
schema = StructType([
StructField("name", StringType(), True),
StructField("age", IntegerType(), True)
])
# Invalid product should have StructType schema
spark.createDataFrame(data, IntegerType())
# This shouldn't type check, though is technically speaking valid
# because samplingRatio is ignored
spark.createDataFrame(data, schema, samplingRatio=0.1)
out: |
main:14: error: Value of type variable "AtomicValue" of "createDataFrame" of "SparkSession" cannot be "tuple[str, int]" [type-var]
main:18: error: No overload variant of "createDataFrame" of "SparkSession" matches argument types "list[tuple[str, int]]", "StructType", "float" [call-overload]
main:18: note: Possible overload variants:
main:18: note: def [RowLike in (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: Union[list[str], tuple[str, ...]] = ..., samplingRatio: Optional[float] = ...) -> DataFrame
main:18: note: def [RowLike in (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: Union[list[str], tuple[str, ...]] = ..., samplingRatio: Optional[float] = ...) -> DataFrame
main:18: note: def [RowLike in (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: Union[StructType, str], *, verifySchema: bool = ...) -> DataFrame
main:18: note: def [RowLike in (list[Any], tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: Union[StructType, str], *, verifySchema: bool = ...) -> DataFrame
main:18: note: def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: RDD[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame
main:18: note: def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: Iterable[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: DataFrame, samplingRatio: Optional[float] = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: Any, samplingRatio: Optional[float] = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: DataFrame, schema: Union[StructType, str], verifySchema: bool = ...) -> DataFrame
main:18: note: def createDataFrame(self, data: Any, schema: Union[StructType, str], verifySchema: bool = ...) -> DataFrame
- case: createDataFrameFromEmptyRdd
main: |
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType
spark = SparkSession.builder.getOrCreate()
spark.createDataFrame(
spark.sparkContext.emptyRDD(),
schema=StructType(),
)