df.flatten()
flattens a nested schema of df
into a flat one.
scala> val df = Seq((0, (1, (3.0, "a")), (5, 0.9))).toDF() scala> df.printSchema root |-- _1: integer (nullable = false) |-- _2: struct (nullable = true) | |-- _1: integer (nullable = false) | |-- _2: struct (nullable = true) | | |-- _1: double (nullable = false) | | |-- _2: string (nullable = true) |-- _3: struct (nullable = true) | |-- _1: integer (nullable = false) | |-- _2: double (nullable = false) scala> df.flatten(separator = "$").printSchema root |-- _1: integer (nullable = false) |-- _2$_1: integer (nullable = true) |-- _2$_2$_1: double (nullable = true) |-- _2$_2$_2: string (nullable = true) |-- _3$_1: integer (nullable = true) |-- _3$_2: double (nullable = true)
This function parses a column containing a CSV string into a StructType
with the specified schema.
scala> val df = Seq("1, abc, 0.8").toDF() scala> df.printSchema root |-- value: string (nullable = true) scala> val schema = new StructType().add("a", IntegerType).add("b", StringType).add("c", DoubleType) scala> df.select(from_csv($"value", schema)).printSchema root |-- csvtostruct(value): struct (nullable = true) | |-- a: integer (nullable = true) | |-- b: string (nullable = true) | |-- c: double (nullable = true) scala> df.select(from_csv($"value", schema)).show +------------------+ |csvtostruct(value)| +------------------+ | [1, abc,0.8]| +------------------+
This function converts a column containing a StructType
into a CSV string with the specified schema.
scala> val df = Seq((1, "a", (0, 3.9, "abc")), (8, "c", (2, 0.4, "def"))).toDF() scala> df.printSchema root |-- _1: integer (nullable = false) |-- _2: string (nullable = true) |-- _3: struct (nullable = true) | |-- _1: integer (nullable = false) | |-- _2: double (nullable = false) | |-- _3: string (nullable = true) scala> df.select(to_csv($"_3")) scala> df.select(to_csv($"_3")).printSchema root |-- structtocsv(_3): string (nullable = true) scala> df.select(to_csv($"_3")).show +---------------+ |structtocsv(_3)| +---------------+ | 0,3.9,abc| | 2,0.4,def| +---------------+