[SPARK-49561][SQL] Add SQL pipe syntax for the PIVOT and UNPIVOT operators
### What changes were proposed in this pull request?
This PR adds SQL pipe syntax support for the PIVOT and UNPIVOT operators.
For example:
```
CREATE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
("dotNET", 2012, 10000),
("Java", 2012, 20000),
("dotNET", 2012, 5000),
("dotNET", 2013, 48000),
("Java", 2013, 30000)
as courseSales(course, year, earnings);
TABLE courseSales
|> SELECT `year`, course, earnings
|> PIVOT (
SUM(earnings)
FOR course IN ('dotNET', 'Java')
);
2012 15000 20000
2013 48000 30000
```
### Why are the changes needed?
The SQL pipe operator syntax will let users compose queries in a more flexible fashion.
### Does this PR introduce _any_ user-facing change?
Yes, see above.
### How was this patch tested?
This PR adds a few unit test cases, but mostly relies on golden file test coverage. I did this to make sure the answers are correct as this feature is implemented and also so we can look at the analyzer output plans to ensure they look right as well.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #48093 from dtenedor/pipe-pivot.
Authored-by: Daniel Tenedorio <daniel.tenedorio@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 866634b..33ac324 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -1499,6 +1499,11 @@
operatorPipeRightSide
: selectClause
| whereClause
+ // The following two cases match the PIVOT or UNPIVOT clause, respectively.
+ // For each one, we add the other clause as an option in order to return high-quality error
+ // messages in the event that both are present (this is not allowed).
+ | pivotClause unpivotClause?
+ | unpivotClause pivotClause?
;
// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ed6cf32..e235047 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -5893,7 +5893,17 @@
SubqueryAlias(SubqueryAlias.generateSubqueryName(), left)
}
withWhereClause(c, withSubqueryAlias)
- }.get)
+ }.getOrElse(Option(ctx.pivotClause()).map { c =>
+ if (ctx.unpivotClause() != null) {
+ throw QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+ }
+ withPivot(c, left)
+ }.getOrElse(Option(ctx.unpivotClause()).map { c =>
+ if (ctx.pivotClause() != null) {
+ throw QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+ }
+ withUnpivot(c, left)
+ }.get)))
}
/**
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
index c44ce15..8cd062a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
@@ -63,6 +63,74 @@
-- !query
+create temporary view courseSales as select * from values
+ ("dotNET", 2012, 10000),
+ ("Java", 2012, 20000),
+ ("dotNET", 2012, 5000),
+ ("dotNET", 2013, 48000),
+ ("Java", 2013, 30000)
+ as courseSales(course, year, earnings)
+-- !query analysis
+CreateViewCommand `courseSales`, select * from values
+ ("dotNET", 2012, 10000),
+ ("Java", 2012, 20000),
+ ("dotNET", 2012, 5000),
+ ("dotNET", 2013, 48000),
+ ("Java", 2013, 30000)
+ as courseSales(course, year, earnings), false, false, LocalTempView, UNSUPPORTED, true
+ +- Project [course#x, year#x, earnings#x]
+ +- SubqueryAlias courseSales
+ +- LocalRelation [course#x, year#x, earnings#x]
+
+
+-- !query
+create temporary view courseEarnings as select * from values
+ ("dotNET", 15000, 48000, 22500),
+ ("Java", 20000, 30000, NULL)
+ as courseEarnings(course, `2012`, `2013`, `2014`)
+-- !query analysis
+CreateViewCommand `courseEarnings`, select * from values
+ ("dotNET", 15000, 48000, 22500),
+ ("Java", 20000, 30000, NULL)
+ as courseEarnings(course, `2012`, `2013`, `2014`), false, false, LocalTempView, UNSUPPORTED, true
+ +- Project [course#x, 2012#x, 2013#x, 2014#x]
+ +- SubqueryAlias courseEarnings
+ +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
+
+
+-- !query
+create temporary view courseEarningsAndSales as select * from values
+ ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+ ("Java", 20000, 1, 30000, 2, NULL, NULL)
+ as courseEarningsAndSales(
+ course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014)
+-- !query analysis
+CreateViewCommand `courseEarningsAndSales`, select * from values
+ ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+ ("Java", 20000, 1, 30000, 2, NULL, NULL)
+ as courseEarningsAndSales(
+ course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014), false, false, LocalTempView, UNSUPPORTED, true
+ +- Project [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+ +- SubqueryAlias courseEarningsAndSales
+ +- LocalRelation [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+
+
+-- !query
+create temporary view yearsWithComplexTypes as select * from values
+ (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+ (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+ as yearsWithComplexTypes(y, a, m, s)
+-- !query analysis
+CreateViewCommand `yearsWithComplexTypes`, select * from values
+ (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+ (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+ as yearsWithComplexTypes(y, a, m, s), false, false, LocalTempView, UNSUPPORTED, true
+ +- Project [y#x, a#x, m#x, s#x]
+ +- SubqueryAlias yearsWithComplexTypes
+ +- LocalRelation [y#x, a#x, m#x, s#x]
+
+
+-- !query
table t
|> select 1 as x
-- !query analysis
@@ -570,6 +638,290 @@
-- !query
+table courseSales
+|> select `year`, course, earnings
+|> pivot (
+ sum(earnings)
+ for course in ('dotNET', 'Java')
+ )
+-- !query analysis
+Project [year#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS dotNET#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS Java#xL]
++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(coursesales.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x]
+ +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(coursesales.earnings)#xL]
+ +- Project [year#x, course#x, earnings#x]
+ +- SubqueryAlias coursesales
+ +- View (`courseSales`, [course#x, year#x, earnings#x])
+ +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+ +- Project [course#x, year#x, earnings#x]
+ +- SubqueryAlias courseSales
+ +- LocalRelation [course#x, year#x, earnings#x]
+
+
+-- !query
+table courseSales
+|> select `year` as y, course as c, earnings as e
+|> pivot (
+ sum(e) as s, avg(e) as a
+ for y in (2012 as firstYear, 2013 as secondYear)
+ )
+-- !query analysis
+Project [c#x, __pivot_sum(e) AS s AS `sum(e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(e) AS a AS `avg(e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(e) AS s AS `sum(e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(e) AS a AS `avg(e) AS a`#x[1] AS secondYear_a#x]
++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(e) AS s AS `sum(e) AS s`#x, pivotfirst(y#x, avg(e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(e) AS a AS `avg(e) AS a`#x]
+ +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(e) AS s#xL, avg(e#x) AS avg(e) AS a#x]
+ +- Project [pipeselect(year#x) AS y#x, pipeselect(course#x) AS c#x, pipeselect(earnings#x) AS e#x]
+ +- SubqueryAlias coursesales
+ +- View (`courseSales`, [course#x, year#x, earnings#x])
+ +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+ +- Project [course#x, year#x, earnings#x]
+ +- SubqueryAlias courseSales
+ +- LocalRelation [course#x, year#x, earnings#x]
+
+
+-- !query
+select course, `year`, y, a
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+ max(a)
+ for (y, course) in ((2012, 'dotNET'), (2013, 'Java'))
+ )
+-- !query analysis
+Aggregate [year#x], [year#x, max(if ((named_struct(y, y#x, course, course#x) <=> cast(named_struct(col1, 2012, col2, dotNET) as struct<y:int,course:string>))) a#x else cast(null as array<int>)) AS {2012, dotNET}#x, max(if ((named_struct(y, y#x, course, course#x) <=> cast(named_struct(col1, 2013, col2, Java) as struct<y:int,course:string>))) a#x else cast(null as array<int>)) AS {2013, Java}#x]
++- Project [course#x, year#x, y#x, a#x]
+ +- Join Inner, (year#x = y#x)
+ :- SubqueryAlias coursesales
+ : +- View (`courseSales`, [course#x, year#x, earnings#x])
+ : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+ : +- Project [course#x, year#x, earnings#x]
+ : +- SubqueryAlias courseSales
+ : +- LocalRelation [course#x, year#x, earnings#x]
+ +- SubqueryAlias yearswithcomplextypes
+ +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x])
+ +- Project [cast(y#x as int) AS y#x, cast(a#x as array<int>) AS a#x, cast(m#x as map<string,int>) AS m#x, cast(s#x as struct<col1:int,col2:string>) AS s#x]
+ +- Project [y#x, a#x, m#x, s#x]
+ +- SubqueryAlias yearsWithComplexTypes
+ +- LocalRelation [y#x, a#x, m#x, s#x]
+
+
+-- !query
+select earnings, `year`, s
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+ sum(earnings)
+ for s in ((1, 'a'), (2, 'b'))
+ )
+-- !query analysis
+Project [year#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS {2, b}#xL]
++- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(coursesales.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x]
+ +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(coursesales.earnings)#xL]
+ +- Project [earnings#x, year#x, s#x]
+ +- Join Inner, (year#x = y#x)
+ :- SubqueryAlias coursesales
+ : +- View (`courseSales`, [course#x, year#x, earnings#x])
+ : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+ : +- Project [course#x, year#x, earnings#x]
+ : +- SubqueryAlias courseSales
+ : +- LocalRelation [course#x, year#x, earnings#x]
+ +- SubqueryAlias yearswithcomplextypes
+ +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x])
+ +- Project [cast(y#x as int) AS y#x, cast(a#x as array<int>) AS a#x, cast(m#x as map<string,int>) AS m#x, cast(s#x as struct<col1:int,col2:string>) AS s#x]
+ +- Project [y#x, a#x, m#x, s#x]
+ +- SubqueryAlias yearsWithComplexTypes
+ +- LocalRelation [y#x, a#x, m#x, s#x]
+
+
+-- !query
+table courseEarnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+-- !query analysis
+Filter isnotnull(coalesce(earningsYear#x))
++- Expand [[course#x, 2012, 2012#x], [course#x, 2013, 2013#x], [course#x, 2014, 2014#x]], [course#x, year#x, earningsYear#x]
+ +- SubqueryAlias courseearnings
+ +- View (`courseEarnings`, [course#x, 2012#x, 2013#x, 2014#x])
+ +- Project [cast(course#x as string) AS course#x, cast(2012#x as int) AS 2012#x, cast(2013#x as int) AS 2013#x, cast(2014#x as int) AS 2014#x]
+ +- Project [course#x, 2012#x, 2013#x, 2014#x]
+ +- SubqueryAlias courseEarnings
+ +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
+
+
+-- !query
+table courseEarnings
+|> unpivot include nulls (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+-- !query analysis
+Expand [[course#x, 2012, 2012#x], [course#x, 2013, 2013#x], [course#x, 2014, 2014#x]], [course#x, year#x, earningsYear#x]
++- SubqueryAlias courseearnings
+ +- View (`courseEarnings`, [course#x, 2012#x, 2013#x, 2014#x])
+ +- Project [cast(course#x as string) AS course#x, cast(2012#x as int) AS 2012#x, cast(2013#x as int) AS 2013#x, cast(2014#x as int) AS 2014#x]
+ +- Project [course#x, 2012#x, 2013#x, 2014#x]
+ +- SubqueryAlias courseEarnings
+ +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
+
+
+-- !query
+table courseEarningsAndSales
+|> unpivot include nulls (
+ (earnings, sales) for `year` in (
+ (earnings2012, sales2012) as `2012`,
+ (earnings2013, sales2013) as `2013`,
+ (earnings2014, sales2014) as `2014`)
+ )
+-- !query analysis
+Expand [[course#x, 2012, earnings2012#x, sales2012#x], [course#x, 2013, earnings2013#x, sales2013#x], [course#x, 2014, earnings2014#x, sales2014#x]], [course#x, year#x, earnings#x, sales#x]
++- SubqueryAlias courseearningsandsales
+ +- View (`courseEarningsAndSales`, [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x])
+ +- Project [cast(course#x as string) AS course#x, cast(earnings2012#x as int) AS earnings2012#x, cast(sales2012#x as int) AS sales2012#x, cast(earnings2013#x as int) AS earnings2013#x, cast(sales2013#x as int) AS sales2013#x, cast(earnings2014#x as int) AS earnings2014#x, cast(sales2014#x as int) AS sales2014#x]
+ +- Project [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+ +- SubqueryAlias courseEarningsAndSales
+ +- LocalRelation [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`year`",
+ "proposal" : "`course`, `earnings`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 49,
+ "stopIndex" : 111,
+ "fragment" : "pivot (\n sum(earnings)\n for `year` in (2012, 2013)\n )"
+ } ]
+}
+
+
+-- !query
+table courseSales
+|> pivot (
+ sum(earnings)
+ for `year` in (course, 2013)
+ )
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NON_LITERAL_PIVOT_VALUES",
+ "sqlState" : "42K08",
+ "messageParameters" : {
+ "expression" : "\"course\""
+ }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+ unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+ "sqlState" : "42601",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 186,
+ "fragment" : "table courseSales\n|> select course, earnings\n|> pivot (\n sum(earnings)\n for `year` in (2012, 2013)\n )\n unpivot (\n earningsYear for `year` in (`2012`, `2013`, `2014`)\n )"
+ } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+ "sqlState" : "42601",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 186,
+ "fragment" : "table courseSales\n|> select course, earnings\n|> unpivot (\n earningsYear for `year` in (`2012`, `2013`, `2014`)\n )\n pivot (\n sum(earnings)\n for `year` in (2012, 2013)\n )"
+ } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'pivot'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'unpivot'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
drop table t
-- !query analysis
DropTable false, false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql b/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
index 49a7213..3aa01d4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
@@ -12,6 +12,30 @@
create table st(x int, col struct<i1:int, i2:int>) using parquet;
insert into st values (1, (2, 3));
+create temporary view courseSales as select * from values
+ ("dotNET", 2012, 10000),
+ ("Java", 2012, 20000),
+ ("dotNET", 2012, 5000),
+ ("dotNET", 2013, 48000),
+ ("Java", 2013, 30000)
+ as courseSales(course, year, earnings);
+
+create temporary view courseEarnings as select * from values
+ ("dotNET", 15000, 48000, 22500),
+ ("Java", 20000, 30000, NULL)
+ as courseEarnings(course, `2012`, `2013`, `2014`);
+
+create temporary view courseEarningsAndSales as select * from values
+ ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+ ("Java", 20000, 1, 30000, 2, NULL, NULL)
+ as courseEarningsAndSales(
+ course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014);
+
+create temporary view yearsWithComplexTypes as select * from values
+ (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+ (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+ as yearsWithComplexTypes(y, a, m, s);
+
-- SELECT operators: positive tests.
---------------------------------------
@@ -185,6 +209,123 @@
(select x, sum(length(y)) as sum_len from t group by x)
|> where sum(length(y)) = 3;
+-- Pivot and unpivot operators: positive tests.
+-----------------------------------------------
+
+table courseSales
+|> select `year`, course, earnings
+|> pivot (
+ sum(earnings)
+ for course in ('dotNET', 'Java')
+ );
+
+table courseSales
+|> select `year` as y, course as c, earnings as e
+|> pivot (
+ sum(e) as s, avg(e) as a
+ for y in (2012 as firstYear, 2013 as secondYear)
+ );
+
+-- Pivot on multiple pivot columns with aggregate columns of complex data types.
+select course, `year`, y, a
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+ max(a)
+ for (y, course) in ((2012, 'dotNET'), (2013, 'Java'))
+ );
+
+-- Pivot on pivot column of struct type.
+select earnings, `year`, s
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+ sum(earnings)
+ for s in ((1, 'a'), (2, 'b'))
+ );
+
+table courseEarnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ );
+
+table courseEarnings
+|> unpivot include nulls (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ );
+
+table courseEarningsAndSales
+|> unpivot include nulls (
+ (earnings, sales) for `year` in (
+ (earnings2012, sales2012) as `2012`,
+ (earnings2013, sales2013) as `2013`,
+ (earnings2014, sales2014) as `2014`)
+ );
+
+-- Pivot and unpivot operators: negative tests.
+-----------------------------------------------
+
+-- The PIVOT operator refers to a column 'year' is not available in the input relation.
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ );
+
+-- Non-literal PIVOT values are not supported.
+table courseSales
+|> pivot (
+ sum(earnings)
+ for `year` in (course, 2013)
+ );
+
+-- The PIVOT and UNPIVOT clauses are mutually exclusive.
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+ unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ );
+
+table courseSales
+|> select course, earnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ );
+
+-- Multiple PIVOT and/or UNPIVOT clauses are not supported in the same pipe operator.
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ );
+
+table courseSales
+|> select course, earnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ );
+
-- Cleanup.
-----------
drop table t;
diff --git a/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
index 38436b0..2c6abe2 100644
--- a/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
@@ -72,6 +72,54 @@
-- !query
+create temporary view courseSales as select * from values
+ ("dotNET", 2012, 10000),
+ ("Java", 2012, 20000),
+ ("dotNET", 2012, 5000),
+ ("dotNET", 2013, 48000),
+ ("Java", 2013, 30000)
+ as courseSales(course, year, earnings)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view courseEarnings as select * from values
+ ("dotNET", 15000, 48000, 22500),
+ ("Java", 20000, 30000, NULL)
+ as courseEarnings(course, `2012`, `2013`, `2014`)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view courseEarningsAndSales as select * from values
+ ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+ ("Java", 20000, 1, 30000, 2, NULL, NULL)
+ as courseEarningsAndSales(
+ course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view yearsWithComplexTypes as select * from values
+ (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+ (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+ as yearsWithComplexTypes(y, a, m, s)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
table t
|> select 1 as x
-- !query schema
@@ -553,6 +601,267 @@
-- !query
+table courseSales
+|> select `year`, course, earnings
+|> pivot (
+ sum(earnings)
+ for course in ('dotNET', 'Java')
+ )
+-- !query schema
+struct<year:int,dotNET:bigint,Java:bigint>
+-- !query output
+2012 15000 20000
+2013 48000 30000
+
+
+-- !query
+table courseSales
+|> select `year` as y, course as c, earnings as e
+|> pivot (
+ sum(e) as s, avg(e) as a
+ for y in (2012 as firstYear, 2013 as secondYear)
+ )
+-- !query schema
+struct<c:string,firstYear_s:bigint,firstYear_a:double,secondYear_s:bigint,secondYear_a:double>
+-- !query output
+Java 20000 20000.0 30000 30000.0
+dotNET 15000 7500.0 48000 48000.0
+
+
+-- !query
+select course, `year`, y, a
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+ max(a)
+ for (y, course) in ((2012, 'dotNET'), (2013, 'Java'))
+ )
+-- !query schema
+struct<year:int,{2012, dotNET}:array<int>,{2013, Java}:array<int>>
+-- !query output
+2012 [1,1] NULL
+2013 NULL [2,2]
+
+
+-- !query
+select earnings, `year`, s
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+ sum(earnings)
+ for s in ((1, 'a'), (2, 'b'))
+ )
+-- !query schema
+struct<year:int,{1, a}:bigint,{2, b}:bigint>
+-- !query output
+2012 35000 NULL
+2013 NULL 78000
+
+
+-- !query
+table courseEarnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java 2012 20000
+Java 2013 30000
+dotNET 2012 15000
+dotNET 2013 48000
+dotNET 2014 22500
+
+
+-- !query
+table courseEarnings
+|> unpivot include nulls (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java 2012 20000
+Java 2013 30000
+Java 2014 NULL
+dotNET 2012 15000
+dotNET 2013 48000
+dotNET 2014 22500
+
+
+-- !query
+table courseEarningsAndSales
+|> unpivot include nulls (
+ (earnings, sales) for `year` in (
+ (earnings2012, sales2012) as `2012`,
+ (earnings2013, sales2013) as `2013`,
+ (earnings2014, sales2014) as `2014`)
+ )
+-- !query schema
+struct<course:string,year:string,earnings:int,sales:int>
+-- !query output
+Java 2012 20000 1
+Java 2013 30000 2
+Java 2014 NULL NULL
+dotNET 2012 15000 NULL
+dotNET 2013 48000 1
+dotNET 2014 22500 1
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`year`",
+ "proposal" : "`course`, `earnings`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 49,
+ "stopIndex" : 111,
+ "fragment" : "pivot (\n sum(earnings)\n for `year` in (2012, 2013)\n )"
+ } ]
+}
+
+
+-- !query
+table courseSales
+|> pivot (
+ sum(earnings)
+ for `year` in (course, 2013)
+ )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "NON_LITERAL_PIVOT_VALUES",
+ "sqlState" : "42K08",
+ "messageParameters" : {
+ "expression" : "\"course\""
+ }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+ unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+ "sqlState" : "42601",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 186,
+ "fragment" : "table courseSales\n|> select course, earnings\n|> pivot (\n sum(earnings)\n for `year` in (2012, 2013)\n )\n unpivot (\n earningsYear for `year` in (`2012`, `2013`, `2014`)\n )"
+ } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+ "sqlState" : "42601",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 186,
+ "fragment" : "table courseSales\n|> select course, earnings\n|> unpivot (\n earningsYear for `year` in (`2012`, `2013`, `2014`)\n )\n pivot (\n sum(earnings)\n for `year` in (2012, 2013)\n )"
+ } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'pivot'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ unpivot (
+ earningsYear for `year` in (`2012`, `2013`, `2014`)
+ )
+ pivot (
+ sum(earnings)
+ for `year` in (2012, 2013)
+ )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'unpivot'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
drop table t
-- !query schema
struct<>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index ab949c5..1111a65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -26,7 +26,7 @@
import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, GreaterThan, Literal, NullsFirst, SortOrder, UnresolvedWindowExpression, UnspecifiedFrame, WindowSpecDefinition, WindowSpecReference}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.trees.TreePattern.{FILTER, LOCAL_RELATION, PROJECT, UNRESOLVED_RELATION}
+import org.apache.spark.sql.catalyst.trees.TreePattern._
import org.apache.spark.sql.connector.catalog.TableCatalog
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, RefreshResource}
@@ -887,24 +887,47 @@
// Basic selection.
// Here we check that every parsed plan contains a projection and a source relation or
// inline table.
- def checkPipeSelect(query: String): Unit = {
+ def check(query: String, patterns: Seq[TreePattern]): Unit = {
val plan: LogicalPlan = parser.parsePlan(query)
- assert(plan.containsPattern(PROJECT))
+ assert(patterns.exists(plan.containsPattern))
assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
}
+ def checkPipeSelect(query: String): Unit = check(query, Seq(PROJECT))
checkPipeSelect("TABLE t |> SELECT 1 AS X")
checkPipeSelect("TABLE t |> SELECT 1 AS X, 2 AS Y |> SELECT X + Y AS Z")
checkPipeSelect("VALUES (0), (1) tab(col) |> SELECT col * 2 AS result")
// Basic WHERE operators.
- def checkPipeWhere(query: String): Unit = {
- val plan: LogicalPlan = parser.parsePlan(query)
- assert(plan.containsPattern(FILTER))
- assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
- }
+ def checkPipeWhere(query: String): Unit = check(query, Seq(FILTER))
checkPipeWhere("TABLE t |> WHERE X = 1")
checkPipeWhere("TABLE t |> SELECT X, LENGTH(Y) AS Z |> WHERE X + LENGTH(Y) < 4")
checkPipeWhere("TABLE t |> WHERE X = 1 AND Y = 2 |> WHERE X + Y = 3")
checkPipeWhere("VALUES (0), (1) tab(col) |> WHERE col < 1")
+ // PIVOT and UNPIVOT operations
+ def checkPivotUnpivot(query: String): Unit = check(query, Seq(PIVOT, UNPIVOT))
+ checkPivotUnpivot(
+ """
+ |SELECT * FROM VALUES
+ | ("dotNET", 2012, 10000),
+ | ("Java", 2012, 20000),
+ | ("dotNET", 2012, 5000),
+ | ("dotNET", 2013, 48000),
+ | ("Java", 2013, 30000)
+ | AS courseSales(course, year, earnings)
+ ||> PIVOT (
+ | SUM(earnings)
+ | FOR course IN ('dotNET', 'Java')
+ |)
+ |""".stripMargin)
+ checkPivotUnpivot(
+ """
+ |SELECT * FROM VALUES
+ | ("dotNET", 15000, 48000, 22500),
+ | ("Java", 20000, 30000, NULL)
+ | AS courseEarnings(course, `2012`, `2013`, `2014`)
+ ||> UNPIVOT (
+ | earningsYear FOR year IN (`2012`, `2013`, `2014`)
+ |)
+ |""".stripMargin)
}
}
}