| # the hash partition for aggregation is adjusted if subsequent analytic computation |
| # can take advantage of it |
| select |
| max(tinyint_col) over(partition by int_col) |
| from functional.alltypes |
| group by int_col, tinyint_col |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | |
| 03:ANALYTIC |
| | functions: max(tinyint_col) |
| | partition by: int_col |
| | |
| 02:SORT |
| | order by: int_col ASC NULLS FIRST |
| | |
| 05:AGGREGATE [FINALIZE] |
| | group by: int_col, tinyint_col |
| | |
| 04:EXCHANGE [HASH(int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: int_col, tinyint_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # partition groups are coalesced if the intersection of their partition exprs |
| # has a high enough cardinality to allow distribution across all nodes |
| select |
| max(int_col) over(partition by int_col, bool_col), |
| max(int_col) over(partition by int_col, tinyint_col) |
| from functional.alltypes |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | |
| 04:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col, tinyint_col |
| | |
| 03:SORT |
| | order by: int_col ASC NULLS FIRST, tinyint_col ASC NULLS FIRST |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col, bool_col |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST, bool_col ASC NULLS FIRST |
| | |
| 05:EXCHANGE [HASH(int_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # unpartitioned analytics are executed with distributed sorts |
| # TODO: avoid resorting on the same exprs |
| select |
| max(int_col) over(partition by int_col), |
| min(int_col) over(order by int_col) |
| from functional.alltypes |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:ANALYTIC |
| | functions: min(int_col) |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:ANALYTIC |
| | functions: min(int_col) |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: int_col ASC |
| | |
| 03:SORT |
| | order by: int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST |
| | |
| 05:EXCHANGE [HASH(int_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # coalesce sort groups |
| select |
| # sort group 1 |
| max(int_col) over(partition by bool_col order by bigint_col, tinyint_col), |
| max(int_col) over(partition by bool_col order by bigint_col), |
| max(int_col) over(partition by bool_col), |
| |
| # sort group 2 |
| max(int_col) over(partition by int_col order by bigint_col), |
| |
| # sort group 3 |
| max(int_col) over(partition by int_col order by bigint_col desc) |
| from functional.alltypes |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 08:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | |
| 07:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | order by: bigint_col ASC, tinyint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: bool_col ASC NULLS FIRST, bigint_col ASC, tinyint_col ASC |
| | |
| 04:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col |
| | order by: bigint_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: int_col ASC NULLS FIRST, bigint_col DESC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 11:EXCHANGE [UNPARTITIONED] |
| | |
| 08:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | |
| 07:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | order by: bigint_col ASC, tinyint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: bool_col ASC NULLS FIRST, bigint_col ASC, tinyint_col ASC |
| | |
| 10:EXCHANGE [HASH(bool_col)] |
| | |
| 04:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col |
| | order by: bigint_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: int_col ASC NULLS FIRST, bigint_col DESC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 09:EXCHANGE [HASH(int_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # check ordering of partition, sort and window groups |
| select |
| # non-partitioning group with 2 sort groups |
| # 2nd sort group |
| min(int_col) over(order by bigint_col), |
| max(int_col) over(order by bigint_col), |
| # 1st sort group |
| min(int_col) over(), |
| |
| # 2nd partition group, with two sort groups |
| # 2nd sort group |
| max(int_col) over(partition by bool_col order by bigint_col), |
| min(int_col) over(partition by bool_col order by bigint_col), |
| count(int_col) over(partition by bool_col order by bigint_col), |
| # 1st sort group |
| max(int_col) over(partition by bool_col order by int_col), |
| |
| # 1st partition group, with two sort groups |
| # 2nd sort group |
| max(int_col) over(partition by int_col, smallint_col order by bigint_col), |
| min(int_col) over(partition by int_col, smallint_col order by bigint_col), |
| # 1st sort group |
| max(int_col) over(partition by int_col, smallint_col order by int_col) |
| from functional.alltypes |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 11:ANALYTIC |
| | functions: min(int_col), max(int_col) |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 10:ANALYTIC |
| | functions: min(int_col) |
| | |
| 09:SORT |
| | order by: bigint_col ASC |
| | |
| 08:ANALYTIC |
| | functions: max(int_col), min(int_col), count(int_col) |
| | partition by: bool_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 07:SORT |
| | order by: bool_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 06:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: bool_col ASC NULLS FIRST, int_col ASC |
| | |
| 04:ANALYTIC |
| | functions: max(int_col), min(int_col) |
| | partition by: int_col, smallint_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col, smallint_col |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, int_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 11:ANALYTIC |
| | functions: min(int_col), max(int_col) |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 10:ANALYTIC |
| | functions: min(int_col) |
| | |
| 14:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bigint_col ASC |
| | |
| 09:SORT |
| | order by: bigint_col ASC |
| | |
| 08:ANALYTIC |
| | functions: max(int_col), min(int_col), count(int_col) |
| | partition by: bool_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 07:SORT |
| | order by: bool_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 06:ANALYTIC |
| | functions: max(int_col) |
| | partition by: bool_col |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: bool_col ASC NULLS FIRST, int_col ASC |
| | |
| 13:EXCHANGE [HASH(bool_col)] |
| | |
| 04:ANALYTIC |
| | functions: max(int_col), min(int_col) |
| | partition by: int_col, smallint_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col) |
| | partition by: int_col, smallint_col |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, int_col ASC |
| | |
| 12:EXCHANGE [HASH(int_col,smallint_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # basic analytic with default window and no partition/ordering |
| select count(*) over() from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:ANALYTIC |
| | functions: count(*) |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 01:ANALYTIC |
| | functions: count(*) |
| | |
| 02:EXCHANGE [UNPARTITIONED] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # basic analytic with default window and partition |
| select tinyint_col, sum(bigint_col) over(partition by tinyint_col) sum_of_bigints |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:ANALYTIC |
| | functions: sum(bigint_col) |
| | partition by: tinyint_col |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 02:ANALYTIC |
| | functions: sum(bigint_col) |
| | partition by: tinyint_col |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST |
| | |
| 03:EXCHANGE [HASH(tinyint_col)] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # basic analytic with default window and ordering |
| select int_col, rank() over(order by int_col) from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:ANALYTIC |
| | functions: rank() |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 02:ANALYTIC |
| | functions: rank() |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: int_col ASC |
| | |
| 01:SORT |
| | order by: int_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # analytic rows window, partition and ordering using complex expressions, with limit |
| select bigint_col, count(double_col) |
| over(partition by tinyint_col + 1, double_col / 2 order by 4 - int_col, 4 * smallint_col |
| rows between 1 preceding and 1 following) |
| from functional.alltypesagg |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:ANALYTIC |
| | functions: count(double_col) |
| | partition by: tinyint_col + 1, double_col / 2 |
| | order by: 4 - int_col ASC, 4 * smallint_col ASC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | limit: 10 |
| | |
| 01:SORT |
| | order by: tinyint_col + 1 ASC NULLS FIRST, double_col / 2 ASC NULLS FIRST, 4 - int_col ASC, 4 * smallint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 02:ANALYTIC |
| | functions: count(double_col) |
| | partition by: tinyint_col + 1, double_col / 2 |
| | order by: 4 - int_col ASC, 4 * smallint_col ASC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | limit: 10 |
| | |
| 01:SORT |
| | order by: tinyint_col + 1 ASC NULLS FIRST, double_col / 2 ASC NULLS FIRST, 4 - int_col ASC, 4 * smallint_col ASC |
| | |
| 03:EXCHANGE [HASH(tinyint_col + 1,double_col / 2)] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # test de-duplication of analytic exprs |
| select |
| count(bigint_col) |
| over(partition by bool_col order by int_col desc rows between 1 preceding and 1 following), |
| avg(double_col) |
| over(partition by bool_col order by int_col desc rows between 2 preceding and 2 following), |
| sum(double_col) |
| over(partition by bool_col order by int_col desc rows between 1 preceding and 1 following), |
| # duplicate analytic expr |
| count(bigint_col) |
| over(partition by bool_col order by int_col desc rows between 1 preceding and 1 following), |
| avg(double_col) |
| over(order by int_col desc rows between 1 preceding and 1 following), |
| # duplicate analytic expr |
| avg(double_col) |
| over(partition by bool_col order by int_col desc rows between 2 preceding and 2 following) |
| from functional.alltypes |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:ANALYTIC |
| | functions: avg(double_col) |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | limit: 10 |
| | |
| 04:SORT |
| | order by: int_col DESC |
| | |
| 03:ANALYTIC |
| | functions: count(bigint_col), sum(double_col) |
| | partition by: bool_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 02:ANALYTIC |
| | functions: avg(double_col) |
| | partition by: bool_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST, int_col DESC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 05:ANALYTIC |
| | functions: avg(double_col) |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | limit: 10 |
| | |
| 07:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: int_col DESC |
| | |
| 04:SORT |
| | order by: int_col DESC |
| | |
| 03:ANALYTIC |
| | functions: count(bigint_col), sum(double_col) |
| | partition by: bool_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 02:ANALYTIC |
| | functions: avg(double_col) |
| | partition by: bool_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST, int_col DESC |
| | |
| 06:EXCHANGE [HASH(bool_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # analytic on the output of a join with a final order by |
| select a.tinyint_col, a.int_col, count(a.double_col) |
| over(partition by a.tinyint_col order by a.int_col desc rows between 1 preceding and 1 following) |
| from functional.alltypes a inner join functional.alltypessmall b on a.id = b.id |
| order by a.tinyint_col, a.int_col |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:SORT |
| | order by: tinyint_col ASC, int_col ASC |
| | |
| 04:ANALYTIC |
| | functions: count(double_col) |
| | partition by: a.tinyint_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, int_col DESC |
| | |
| 02:HASH JOIN [INNER JOIN] |
| | hash predicates: a.id = b.id |
| | runtime filters: RF000 <- b.id |
| | |
| |--01:SCAN HDFS [functional.alltypessmall b] |
| | partitions=4/4 files=4 size=6.32KB |
| | |
| 00:SCAN HDFS [functional.alltypes a] |
| partitions=24/24 files=24 size=478.45KB |
| runtime filters: RF000 -> a.id |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: tinyint_col ASC, int_col ASC |
| | |
| 05:SORT |
| | order by: tinyint_col ASC, int_col ASC |
| | |
| 04:ANALYTIC |
| | functions: count(double_col) |
| | partition by: a.tinyint_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, int_col DESC |
| | |
| 07:EXCHANGE [HASH(a.tinyint_col)] |
| | |
| 02:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: a.id = b.id |
| | runtime filters: RF000 <- b.id |
| | |
| |--06:EXCHANGE [BROADCAST] |
| | | |
| | 01:SCAN HDFS [functional.alltypessmall b] |
| | partitions=4/4 files=4 size=6.32KB |
| | |
| 00:SCAN HDFS [functional.alltypes a] |
| partitions=24/24 files=24 size=478.45KB |
| runtime filters: RF000 -> a.id |
| ==== |
| # analytics on a grouped aggregation with a final order by |
| select bool_col, |
| sum(min(int_col)) |
| over(partition by min(tinyint_col) order by max(int_col) |
| rows between unbounded preceding and 1 following), |
| max(sum(bigint_col)) |
| over(partition by min(tinyint_col) order by max(int_col) |
| rows between unbounded preceding and 1 following), |
| min(sum(bigint_col)) |
| over(partition by min(tinyint_col) order by sum(int_col) |
| rows between unbounded preceding and 4 following) |
| from functional.alltypes |
| group by 1 |
| order by 1, 2, 3 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 06:SORT |
| | order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC |
| | |
| 05:ANALYTIC |
| | functions: sum(min(int_col)), max(sum(bigint_col)) |
| | partition by: min(tinyint_col) |
| | order by: max(int_col) ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 04:SORT |
| | order by: min(tinyint_col) ASC NULLS FIRST, max(int_col) ASC |
| | |
| 03:ANALYTIC |
| | functions: min(sum(bigint_col)) |
| | partition by: min(tinyint_col) |
| | order by: sum(int_col) ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 FOLLOWING |
| | |
| 02:SORT |
| | order by: min(tinyint_col) ASC NULLS FIRST, sum(int_col) ASC |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: min(int_col), min(tinyint_col), max(int_col), sum(bigint_col), sum(int_col) |
| | group by: bool_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 10:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC |
| | |
| 06:SORT |
| | order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC |
| | |
| 05:ANALYTIC |
| | functions: sum(min(int_col)), max(sum(bigint_col)) |
| | partition by: min(tinyint_col) |
| | order by: max(int_col) ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 04:SORT |
| | order by: min(tinyint_col) ASC NULLS FIRST, max(int_col) ASC |
| | |
| 03:ANALYTIC |
| | functions: min(sum(bigint_col)) |
| | partition by: min(tinyint_col) |
| | order by: sum(int_col) ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 FOLLOWING |
| | |
| 02:SORT |
| | order by: min(tinyint_col) ASC NULLS FIRST, sum(int_col) ASC |
| | |
| 09:EXCHANGE [HASH(min(tinyint_col))] |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: min:merge(int_col), min:merge(tinyint_col), max:merge(int_col), sum:merge(bigint_col), sum:merge(int_col) |
| | group by: bool_col |
| | |
| 07:EXCHANGE [HASH(bool_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: min(int_col), min(tinyint_col), max(int_col), sum(bigint_col), sum(int_col) |
| | group by: bool_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # grouping of multiple analytic exprs by compatible window/partition/order; |
| # the distributed plan repartitions only once on tinyint_col |
| select |
| # sort group 1 |
| count(double_col) |
| over(partition by tinyint_col, double_col order by int_col desc |
| rows between 1 preceding and 1 following), |
| last_value(double_col) |
| over(partition by double_col, tinyint_col order by int_col desc |
| rows between 1 preceding and 1 following), |
| # compatible default RANGE window |
| sum(tinyint_col) |
| over(partition by tinyint_col, double_col order by int_col desc), |
| # compatible RANGE window |
| # TODO: Revert window when RANGE offsets are supported |
| sum(smallint_col) |
| over(partition by double_col, tinyint_col order by int_col desc |
| range between unbounded preceding and unbounded following), |
| # range between 1 preceding and 1 following), |
| # different window |
| first_value(int_col) |
| over(partition by double_col, tinyint_col order by int_col desc |
| rows between 2 preceding and 2 following), |
| # sort group 2 |
| last_value(int_col ignore nulls) |
| over(partition by double_col, tinyint_col order by int_col asc |
| rows between 2 preceding and 2 following), |
| # different partition |
| first_value(int_col) |
| over(partition by double_col, tinyint_col order by int_col asc |
| rows between 2 preceding and 2 following), |
| # same partition group but different sort group |
| min(int_col) |
| over(partition by tinyint_col order by int_col desc |
| rows between unbounded preceding and 2 following) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 10:ANALYTIC |
| | functions: count(double_col), last_value(double_col) |
| | partition by: tinyint_col, double_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 09:ANALYTIC |
| | functions: sum(smallint_col) |
| | partition by: double_col, tinyint_col |
| | order by: int_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING |
| | |
| 08:ANALYTIC |
| | functions: sum(tinyint_col) |
| | partition by: tinyint_col, double_col |
| | order by: int_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 07:ANALYTIC |
| | functions: first_value_rewrite(int_col, -1) |
| | partition by: double_col, tinyint_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING |
| | |
| 06:SORT |
| | order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC |
| | |
| 05:ANALYTIC |
| | functions: first_value_rewrite(int_col, -1) |
| | partition by: double_col, tinyint_col |
| | order by: int_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING |
| | |
| 04:ANALYTIC |
| | functions: last_value_ignore_nulls(int_col) |
| | partition by: double_col, tinyint_col |
| | order by: int_col ASC |
| | window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING |
| | |
| 03:SORT |
| | order by: double_col ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: min(int_col) |
| | partition by: tinyint_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, int_col DESC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 12:EXCHANGE [UNPARTITIONED] |
| | |
| 10:ANALYTIC |
| | functions: count(double_col), last_value(double_col) |
| | partition by: tinyint_col, double_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 09:ANALYTIC |
| | functions: sum(smallint_col) |
| | partition by: double_col, tinyint_col |
| | order by: int_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING |
| | |
| 08:ANALYTIC |
| | functions: sum(tinyint_col) |
| | partition by: tinyint_col, double_col |
| | order by: int_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 07:ANALYTIC |
| | functions: first_value_rewrite(int_col, -1) |
| | partition by: double_col, tinyint_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING |
| | |
| 06:SORT |
| | order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC |
| | |
| 05:ANALYTIC |
| | functions: first_value_rewrite(int_col, -1) |
| | partition by: double_col, tinyint_col |
| | order by: int_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING |
| | |
| 04:ANALYTIC |
| | functions: last_value_ignore_nulls(int_col) |
| | partition by: double_col, tinyint_col |
| | order by: int_col ASC |
| | window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING |
| | |
| 03:SORT |
| | order by: double_col ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: min(int_col) |
| | partition by: tinyint_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, int_col DESC |
| | |
| 11:EXCHANGE [HASH(tinyint_col)] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # grouping of multiple analytic exprs by compatible window/partition/order |
| select |
| count(double_col) |
| over(partition by tinyint_col, double_col order by int_col desc |
| rows between 1 preceding and 1 following), |
| # unpartitioned default RANGE window |
| sum(tinyint_col) |
| over(order by int_col desc), |
| # partition compatible with first analytic expr but no order by |
| sum(smallint_col) |
| over(partition by double_col, tinyint_col), |
| # incompatible analytic expr |
| sum(smallint_col) |
| over(partition by bigint_col order by tinyint_col |
| rows between 2 preceding and 2 following) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 07:ANALYTIC |
| | functions: sum(tinyint_col) |
| | order by: int_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:SORT |
| | order by: int_col DESC |
| | |
| 05:ANALYTIC |
| | functions: sum(smallint_col) |
| | partition by: double_col, tinyint_col |
| | |
| 04:ANALYTIC |
| | functions: count(double_col) |
| | partition by: tinyint_col, double_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC |
| | |
| 02:ANALYTIC |
| | functions: sum(smallint_col) |
| | partition by: bigint_col |
| | order by: tinyint_col ASC |
| | window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING |
| | |
| 01:SORT |
| | order by: bigint_col ASC NULLS FIRST, tinyint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:ANALYTIC |
| | functions: sum(tinyint_col) |
| | order by: int_col DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 10:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: int_col DESC |
| | |
| 06:SORT |
| | order by: int_col DESC |
| | |
| 05:ANALYTIC |
| | functions: sum(smallint_col) |
| | partition by: double_col, tinyint_col |
| | |
| 04:ANALYTIC |
| | functions: count(double_col) |
| | partition by: tinyint_col, double_col |
| | order by: int_col DESC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC |
| | |
| 09:EXCHANGE [HASH(tinyint_col,double_col)] |
| | |
| 02:ANALYTIC |
| | functions: sum(smallint_col) |
| | partition by: bigint_col |
| | order by: tinyint_col ASC |
| | window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING |
| | |
| 01:SORT |
| | order by: bigint_col ASC NULLS FIRST, tinyint_col ASC |
| | |
| 08:EXCHANGE [HASH(bigint_col)] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # basic test for analytics and inline views |
| select double_col, a, b, a + b, double_col + a from |
| (select |
| double_col, |
| count(int_col) over() a, |
| sum(int_col + bigint_col) over(partition by bool_col) b |
| from |
| (select * from functional.alltypes) v1) v2 |
| order by 2, 3, 4 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:SORT |
| | order by: a ASC, b ASC, a + b ASC |
| | |
| 03:ANALYTIC |
| | functions: count(int_col) |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col + bigint_col) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:SORT |
| | order by: a ASC, b ASC, a + b ASC |
| | |
| 03:ANALYTIC |
| | functions: count(int_col) |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col + bigint_col) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 05:EXCHANGE [HASH(functional.alltypes.bool_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # same as above but using a WITH-clause view |
| with v2 as |
| (select |
| double_col, |
| count(int_col) over() a, |
| sum(int_col + bigint_col) over(partition by bool_col) b |
| from |
| (select * from functional.alltypes) v1) |
| select double_col, a, b, a + b, double_col + a from v2 |
| order by 2, 3, 4 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:SORT |
| | order by: a ASC, b ASC, a + b ASC |
| | |
| 03:ANALYTIC |
| | functions: count(int_col) |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col + bigint_col) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:SORT |
| | order by: a ASC, b ASC, a + b ASC |
| | |
| 03:ANALYTIC |
| | functions: count(int_col) |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col + bigint_col) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 05:EXCHANGE [HASH(functional.alltypes.bool_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # test ignoring of non-materialized analytic exprs |
| select b from |
| (select |
| count(int_col) over(order by id) a, |
| sum(int_col) over(partition by bigint_col) b, |
| max(tinyint_col) over() c, |
| min(double_col) over(partition by bool_col order by string_col) d, |
| count(1) over(partition by bool_col order by string_col |
| rows between unbounded preceding and 1 following) e |
| from functional.alltypes) v |
| where e < 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:SELECT |
| | predicates: count(1) < 10 |
| | |
| 04:ANALYTIC |
| | functions: count(1) |
| | partition by: bool_col |
| | order by: string_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: bool_col ASC NULLS FIRST, string_col ASC |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: bigint_col |
| | |
| 01:SORT |
| | order by: bigint_col ASC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:EXCHANGE [UNPARTITIONED] |
| | |
| 05:SELECT |
| | predicates: count(1) < 10 |
| | |
| 04:ANALYTIC |
| | functions: count(1) |
| | partition by: bool_col |
| | order by: string_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: bool_col ASC NULLS FIRST, string_col ASC |
| | |
| 07:EXCHANGE [HASH(bool_col)] |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: bigint_col |
| | |
| 01:SORT |
| | order by: bigint_col ASC NULLS FIRST |
| | |
| 06:EXCHANGE [HASH(bigint_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # basic test for analytics and unions |
| select min(id) over (partition by int_col) |
| from functional.alltypes |
| union distinct |
| select max(id) over (partition by bool_col) |
| from functional.alltypessmall |
| union all |
| (select sum(bigint_col) over (partition by int_col order by id) |
| from functional.alltypestiny) |
| order by 1 desc nulls first |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 12:SORT |
| | order by: min(id) OVER(...) DESC NULLS FIRST |
| | |
| 08:UNION |
| | pass-through-operands: 07 |
| | |
| |--11:ANALYTIC |
| | | functions: sum(bigint_col) |
| | | partition by: int_col |
| | | order by: id ASC |
| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | | |
| | 10:SORT |
| | | order by: int_col ASC NULLS FIRST, id ASC |
| | | |
| | 09:SCAN HDFS [functional.alltypestiny] |
| | partitions=4/4 files=4 size=460B |
| | |
| 07:AGGREGATE [FINALIZE] |
| | group by: min(id) OVER(...) |
| | |
| 00:UNION |
| | |
| |--06:ANALYTIC |
| | | functions: max(id) |
| | | partition by: bool_col |
| | | |
| | 05:SORT |
| | | order by: bool_col ASC NULLS FIRST |
| | | |
| | 04:SCAN HDFS [functional.alltypessmall] |
| | partitions=4/4 files=4 size=6.32KB |
| | |
| 03:ANALYTIC |
| | functions: min(id) |
| | partition by: int_col |
| | |
| 02:SORT |
| | order by: int_col ASC NULLS FIRST |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 18:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: min(id) OVER(...) DESC NULLS FIRST |
| | |
| 12:SORT |
| | order by: min(id) OVER(...) DESC NULLS FIRST |
| | |
| 08:UNION |
| | pass-through-operands: 16 |
| | |
| |--11:ANALYTIC |
| | | functions: sum(bigint_col) |
| | | partition by: int_col |
| | | order by: id ASC |
| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | | |
| | 10:SORT |
| | | order by: int_col ASC NULLS FIRST, id ASC |
| | | |
| | 17:EXCHANGE [HASH(int_col)] |
| | | |
| | 09:SCAN HDFS [functional.alltypestiny] |
| | partitions=4/4 files=4 size=460B |
| | |
| 16:AGGREGATE [FINALIZE] |
| | group by: min(id) OVER(...) |
| | |
| 15:EXCHANGE [HASH(min(id) OVER(...))] |
| | |
| 07:AGGREGATE [STREAMING] |
| | group by: min(id) OVER(...) |
| | |
| 00:UNION |
| | |
| |--06:ANALYTIC |
| | | functions: max(id) |
| | | partition by: bool_col |
| | | |
| | 05:SORT |
| | | order by: bool_col ASC NULLS FIRST |
| | | |
| | 14:EXCHANGE [HASH(bool_col)] |
| | | |
| | 04:SCAN HDFS [functional.alltypessmall] |
| | partitions=4/4 files=4 size=6.32KB |
| | |
| 03:ANALYTIC |
| | functions: min(id) |
| | partition by: int_col |
| | |
| 02:SORT |
| | order by: int_col ASC NULLS FIRST |
| | |
| 13:EXCHANGE [HASH(int_col)] |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # analytics in an uncorrelated subquery |
| select id, int_col, bool_col from functional.alltypessmall t1 |
| where int_col in |
| (select min(bigint_col) over(partition by bool_col) |
| from functional.alltypestiny t2 where t2.id < 10) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:HASH JOIN [LEFT SEMI JOIN] |
| | hash predicates: int_col = min(bigint_col) |
| | runtime filters: RF000 <- min(bigint_col) |
| | |
| |--03:ANALYTIC |
| | | functions: min(bigint_col) |
| | | partition by: bool_col |
| | | |
| | 02:SORT |
| | | order by: bool_col ASC NULLS FIRST |
| | | |
| | 01:SCAN HDFS [functional.alltypestiny t2] |
| | partitions=4/4 files=4 size=460B |
| | predicates: t2.id < 10 |
| | |
| 00:SCAN HDFS [functional.alltypessmall t1] |
| partitions=4/4 files=4 size=6.32KB |
| runtime filters: RF000 -> int_col |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 04:HASH JOIN [LEFT SEMI JOIN, BROADCAST] |
| | hash predicates: int_col = min(bigint_col) |
| | runtime filters: RF000 <- min(bigint_col) |
| | |
| |--06:EXCHANGE [BROADCAST] |
| | | |
| | 03:ANALYTIC |
| | | functions: min(bigint_col) |
| | | partition by: bool_col |
| | | |
| | 02:SORT |
| | | order by: bool_col ASC NULLS FIRST |
| | | |
| | 05:EXCHANGE [HASH(bool_col)] |
| | | |
| | 01:SCAN HDFS [functional.alltypestiny t2] |
| | partitions=4/4 files=4 size=460B |
| | predicates: t2.id < 10 |
| | |
| 00:SCAN HDFS [functional.alltypessmall t1] |
| partitions=4/4 files=4 size=6.32KB |
| runtime filters: RF000 -> int_col |
| ==== |
| # test conjunct assignment and slot materialization due to conjuncts |
| # (see IMPALA-1243) |
| select 1 from |
| (select bigint_col, |
| min(int_col) over() a, |
| max(int_col) over(partition by bool_col) b, |
| count(int_col) over(partition by bool_col) c, |
| sum(int_col) over(partition by bigint_col order by id |
| rows between unbounded preceding and 1 following) d, |
| avg(int_col) over(partition by bigint_col order by id |
| rows between unbounded preceding and 2 following) e |
| from functional.alltypes |
| # assigned in scan |
| where int_col between 5 and 10) v |
| where |
| # assigned to separate Select node |
| v.bigint_col > 10 and |
| v.a < 1 and |
| v.a < v.bigint_col + 1 and |
| v.b < 2 and |
| v.b < v.bigint_col + 2 and |
| v.c < 3 and |
| v.c < v.bigint_col + 3 and |
| v.d < 4 and |
| v.d < v.bigint_col + 4 and |
| v.e < 5 and |
| v.e < v.bigint_col + 5 and |
| v.a != v.c and |
| v.a != v.e and |
| v.b != v.c |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 07:SELECT |
| | predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5 |
| | |
| 06:ANALYTIC |
| | functions: min(int_col) |
| | |
| 05:ANALYTIC |
| | functions: avg(int_col) |
| | partition by: bigint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING |
| | |
| 04:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: bigint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: bigint_col ASC NULLS FIRST, id ASC |
| | |
| 02:ANALYTIC |
| | functions: max(int_col), count(int_col) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| predicates: int_col <= 10, int_col >= 5 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:SELECT |
| | predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5 |
| | |
| 06:ANALYTIC |
| | functions: min(int_col) |
| | |
| 10:EXCHANGE [UNPARTITIONED] |
| | |
| 05:ANALYTIC |
| | functions: avg(int_col) |
| | partition by: bigint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING |
| | |
| 04:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: bigint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 03:SORT |
| | order by: bigint_col ASC NULLS FIRST, id ASC |
| | |
| 09:EXCHANGE [HASH(bigint_col)] |
| | |
| 02:ANALYTIC |
| | functions: max(int_col), count(int_col) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 08:EXCHANGE [HASH(bool_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| predicates: int_col <= 10, int_col >= 5 |
| ==== |
| # test predicate propagation onto and through analytic nodes |
| # TODO: allow AnalyticEvalNode to apply a < 20 |
| select 1 from |
| (select id, int_col, bigint_col, |
| sum(int_col) over(partition by bigint_col order by id |
| rows between unbounded preceding and 1 following) a, |
| avg(int_col) over(partition by bigint_col order by id |
| rows between unbounded preceding and 2 following) b |
| from functional.alltypes) t1 |
| inner join functional.alltypes t2 |
| on (t1.id = t2.id and t1.a = t2.int_col) |
| where t2.id < 10 and t2.int_col < 20 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:HASH JOIN [INNER JOIN] |
| | hash predicates: id = t2.id, sum(int_col) = t2.int_col |
| | |
| |--03:SCAN HDFS [functional.alltypes t2] |
| | partitions=24/24 files=24 size=478.45KB |
| | predicates: t2.id < 10, t2.int_col < 20 |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: bigint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 01:SORT |
| | order by: bigint_col ASC NULLS FIRST, id ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # test that predicates are correctly propagated in the presence of outer joins |
| # (i.e., no predicates should be propagated in this query) |
| select 1 from |
| (select id, int_col, bigint_col, |
| sum(int_col) over(partition by bigint_col order by id |
| rows between unbounded preceding and 1 following) a, |
| avg(int_col) over(partition by bigint_col order by id |
| rows between unbounded preceding and 2 following) b |
| from functional.alltypes) t1 |
| left outer join functional.alltypes t2 |
| on (t1.id = t2.id and t1.a = t2.int_col) |
| where t2.id < 10 and t2.int_col < 20 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:HASH JOIN [LEFT OUTER JOIN] |
| | hash predicates: id = t2.id, sum(int_col) = t2.int_col |
| | other predicates: t2.id < 10, t2.int_col < 20 |
| | |
| |--03:SCAN HDFS [functional.alltypes t2] |
| | partitions=24/24 files=24 size=478.45KB |
| | predicates: t2.id < 10, t2.int_col < 20 |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: bigint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 01:SORT |
| | order by: bigint_col ASC NULLS FIRST, id ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # test canonical function/window/order: row_number() gets a ROWS window |
| select |
| row_number() over(partition by tinyint_col order by id) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:ANALYTIC |
| | functions: row_number() |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # test canonical function/window/order: lead() and lag() have default |
| # arguments explicitly set |
| select |
| lead(int_col) over(partition by tinyint_col order by id), |
| lag(int_col) over(partition by tinyint_col order by id), |
| lead(int_col, 4) over(partition by smallint_col order by id), |
| lag(int_col, 4) over(partition by smallint_col order by id), |
| lead(int_col, 8, 20) over(partition by int_col order by id), |
| lag(int_col, 8, 20) over(partition by int_col order by id) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 09:ANALYTIC |
| | functions: lag(int_col, 8, 20) |
| | partition by: int_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 8 PRECEDING |
| | |
| 08:ANALYTIC |
| | functions: lead(int_col, 8, 20) |
| | partition by: int_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 8 FOLLOWING |
| | |
| 07:SORT |
| | order by: int_col ASC NULLS FIRST, id ASC |
| | |
| 06:ANALYTIC |
| | functions: lag(int_col, 4, NULL) |
| | partition by: smallint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 PRECEDING |
| | |
| 05:ANALYTIC |
| | functions: lead(int_col, 4, NULL) |
| | partition by: smallint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 FOLLOWING |
| | |
| 04:SORT |
| | order by: smallint_col ASC NULLS FIRST, id ASC |
| | |
| 03:ANALYTIC |
| | functions: lag(int_col, 1, NULL) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING |
| | |
| 02:ANALYTIC |
| | functions: lead(int_col, 1, NULL) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # Test canonical function/window/order: Reverse windows ending in UNBOUNDED FOLLOWING |
| # and not starting with UNBOUNDED PRECEDING. |
| select |
| # windows and sort order should be reversed |
| sum(int_col) over(partition by tinyint_col order by id desc nulls first, bool_col asc nulls last |
| rows between 2 preceding and unbounded following), |
| # TODO: Revert window when RANGE offsets are supported |
| sum(int_col) over(partition by tinyint_col order by id asc nulls last |
| range between current row and unbounded following), |
| # range between 4 preceding and unbounded following), |
| # windows and sort order should remain unchanged |
| count(bigint_col) over(partition by tinyint_col order by id), |
| count(bigint_col) over(partition by tinyint_col order by id, int_col |
| rows between 6 preceding and 8 following), |
| # TODO: Revert window when RANGE offsets are supported |
| count(bigint_col) over(partition by tinyint_col order by id |
| range between unbounded preceding and unbounded following) |
| # range between unbounded preceding and 10 following) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 08:ANALYTIC |
| | functions: count(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING |
| | |
| 07:ANALYTIC |
| | functions: count(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: tinyint_col |
| | order by: id ASC, bool_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING |
| | |
| 05:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC NULLS LAST, bool_col DESC NULLS FIRST |
| | |
| 04:ANALYTIC |
| | functions: count(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC, int_col ASC |
| | window: ROWS BETWEEN 6 PRECEDING AND 8 FOLLOWING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC, int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: tinyint_col |
| | order by: id DESC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id DESC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # Test canonical function/window/order: Reverse windows ending in UNBOUNDED FOLLOWING |
| # and either not starting with UNBOUNDED PRECEDING or first_value(... IGNORE NULLS), and |
| # change first_value() to last_value(). |
| # Set the end boundary to CURRENT_ROW for first_value() if the start boundary is |
| # UNBOUNDED_PRECEDING and not IGNORE NULLS. |
| select |
| # windows, sort order and function should be reversed |
| last_value(int_col) over(partition by tinyint_col order by id desc nulls first, bool_col asc nulls last |
| rows between 2 preceding and unbounded following), |
| # TODO: Revert window when RANGE offsets are supported |
| last_value(int_col) over(partition by tinyint_col order by id asc nulls last |
| range between current row and unbounded following), |
| # windows, sort order and function should remain unchanged |
| first_value(bigint_col ignore nulls) over(partition by tinyint_col order by id), |
| first_value(bigint_col) over(partition by tinyint_col order by id, int_col |
| rows between 6 preceding and 8 following), |
| # TODO: Revert window when RANGE offsets are supported |
| first_value(bigint_col) over(partition by tinyint_col order by id |
| range between unbounded preceding and unbounded following), |
| # window, order, and function should be reversed |
| first_value(tinyint_col ignore nulls) over (order by id |
| rows between 1 following and 2 following) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 09:ANALYTIC |
| | functions: last_value_ignore_nulls(tinyint_col) |
| | order by: id DESC |
| | window: ROWS BETWEEN 2 PRECEDING AND 1 PRECEDING |
| | |
| 08:SORT |
| | order by: id DESC NULLS FIRST |
| | |
| 07:ANALYTIC |
| | functions: first_value_ignore_nulls(bigint_col), first_value(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:ANALYTIC |
| | functions: first_value(int_col) |
| | partition by: tinyint_col |
| | order by: id ASC, bool_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC NULLS LAST, bool_col DESC NULLS FIRST |
| | |
| 04:ANALYTIC |
| | functions: first_value_rewrite(bigint_col, -1) |
| | partition by: tinyint_col |
| | order by: id ASC, int_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 6 PRECEDING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC, int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: first_value(int_col) |
| | partition by: tinyint_col |
| | order by: id DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id DESC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # Test canonical function/window/order: Reverse windows ending in UNBOUNDED FOLLOWING |
| # and not starting with UNBOUNDED PRECEDING, and change last_value() to first_value() |
| select |
| # windows, sort order and function should be reversed |
| last_value(int_col) over(partition by tinyint_col order by id desc nulls first, |
| bool_col asc nulls last |
| rows between 2 preceding and unbounded following), |
| # TODO: Revert window when RANGE offsets are supported |
| last_value(int_col) over(partition by tinyint_col order by id asc nulls last |
| range between current row and unbounded following), |
| # windows, sort order and function should remain unchanged |
| last_value(bigint_col) over(partition by tinyint_col order by id), |
| last_value(bigint_col) over(partition by tinyint_col order by id, int_col |
| rows between 6 preceding and 8 following), |
| # TODO: Revert window when RANGE offsets are supported |
| last_value(bigint_col) over(partition by tinyint_col order by id |
| range between unbounded preceding and unbounded following) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 08:ANALYTIC |
| | functions: last_value(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING |
| | |
| 07:ANALYTIC |
| | functions: last_value(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 06:ANALYTIC |
| | functions: first_value(int_col) |
| | partition by: tinyint_col |
| | order by: id ASC, bool_col DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC NULLS LAST, bool_col DESC NULLS FIRST |
| | |
| 04:ANALYTIC |
| | functions: last_value(bigint_col) |
| | partition by: tinyint_col |
| | order by: id ASC, int_col ASC |
| | window: ROWS BETWEEN 6 PRECEDING AND 8 FOLLOWING |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC, int_col ASC |
| | |
| 02:ANALYTIC |
| | functions: first_value(int_col) |
| | partition by: tinyint_col |
| | order by: id DESC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id DESC NULLS FIRST |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # IMPALA-1229 |
| select DENSE_RANK() OVER (ORDER BY t1.day ASC) |
| FROM functional.alltypesagg t1 |
| WHERE EXISTS (SELECT t1.year AS int_col_1 FROM functional.alltypesagg t1) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:ANALYTIC |
| | functions: dense_rank() |
| | order by: day ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: day ASC |
| | |
| 02:NESTED LOOP JOIN [LEFT SEMI JOIN] |
| | |
| |--01:SCAN HDFS [functional.alltypesagg t1] |
| | partitions=11/11 files=11 size=814.73KB |
| | limit: 1 |
| | |
| 00:SCAN HDFS [functional.alltypesagg t1] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # IMPALA-1243: the Where clause predicate needs to be evaluated in a Select node |
| # not as a scan predicate of alltypes |
| select COUNT(*) |
| FROM ( |
| SELECT id, tinyint_col |
| FROM functional.alltypestiny t1 |
| UNION ALL |
| SELECT DENSE_RANK() OVER (ORDER BY t1.id), tinyint_col |
| FROM functional.alltypes t1 ) t1 |
| WHERE id IS NULL and tinyint_col != 5 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | |
| 05:SELECT |
| | predicates: id IS NULL, tinyint_col != 5 |
| | |
| 00:UNION |
| | |
| |--04:ANALYTIC |
| | | functions: dense_rank() |
| | | order by: id ASC |
| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | | |
| | 03:SORT |
| | | order by: id ASC |
| | | |
| | 02:SCAN HDFS [functional.alltypes t1] |
| | partitions=24/24 files=24 size=478.45KB |
| | |
| 01:SCAN HDFS [functional.alltypestiny t1] |
| partitions=4/4 files=4 size=460B |
| predicates: t1.id IS NULL, t1.tinyint_col != 5 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 10:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | |
| 09:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE |
| | output: count(*) |
| | |
| 05:SELECT |
| | predicates: id IS NULL, tinyint_col != 5 |
| | |
| 00:UNION |
| | |
| |--08:EXCHANGE [RANDOM] |
| | | |
| | 04:ANALYTIC |
| | | functions: dense_rank() |
| | | order by: id ASC |
| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | | |
| | 07:MERGING-EXCHANGE [UNPARTITIONED] |
| | | order by: id ASC |
| | | |
| | 03:SORT |
| | | order by: id ASC |
| | | |
| | 02:SCAN HDFS [functional.alltypes t1] |
| | partitions=24/24 files=24 size=478.45KB |
| | |
| 01:SCAN HDFS [functional.alltypestiny t1] |
| partitions=4/4 files=4 size=460B |
| predicates: t1.id IS NULL, t1.tinyint_col != 5 |
| ==== |
| # Propagate a predicate on a partition key through an inline view that has an analytic |
| # function. Predicates that are not compatible with analytic function's partition by |
| # clause are not propagated (IMPALA-1900). |
| select * from |
| (select id, int_col, year, |
| sum(int_col) over(partition by year order by id) as s |
| from functional.alltypes) v |
| where year = 2009 and id = 1 and int_col < 10 and s = 4 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SELECT |
| | predicates: id = 1, int_col < 10, sum(int_col) = 4 |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: year |
| | order by: id ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: year ASC NULLS FIRST, id ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=12/24 files=12 size=238.68KB |
| ==== |
| # Propagate predicates through an inline view that computes multiple analytic functions |
| # (IMPALA-1900) |
| select * from |
| (select year, tinyint_col, |
| last_value(int_col) over(partition by int_col, year order by id |
| rows between 1 preceding and 1 following), |
| last_value(tinyint_col) over(partition by id, year order by int_col |
| range between unbounded preceding and unbounded following), |
| sum(int_col) over(partition by year, tinyint_col), |
| avg(int_col) over(partition by tinyint_col, id, year order by bigint_col) |
| from functional.alltypes) v |
| where year = 2009 and tinyint_col + 1 = 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 09:SELECT |
| | predicates: tinyint_col + 1 = 1 |
| | |
| 08:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: year, tinyint_col |
| | |
| 07:SORT |
| | order by: year ASC NULLS FIRST, tinyint_col ASC NULLS FIRST |
| | |
| 06:ANALYTIC |
| | functions: last_value(int_col) |
| | partition by: int_col, year |
| | order by: id ASC |
| | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |
| | |
| 05:SORT |
| | order by: int_col ASC NULLS FIRST, year ASC NULLS FIRST, id ASC |
| | |
| 04:ANALYTIC |
| | functions: avg(int_col) |
| | partition by: tinyint_col, id, year |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: tinyint_col ASC NULLS FIRST, id ASC NULLS FIRST, year ASC NULLS FIRST, bigint_col ASC |
| | |
| 02:ANALYTIC |
| | functions: last_value(tinyint_col) |
| | partition by: id, year |
| | order by: int_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING |
| | |
| 01:SORT |
| | order by: id ASC NULLS FIRST, year ASC NULLS FIRST, int_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=12/24 files=12 size=238.68KB |
| ==== |
| # Don't propagate predicates through an inline view with multiple analytic functions |
| # if the predicates are not compatible with every analytic function's partition by |
| # clause (IMPALA-1900) |
| select * from |
| (select year, tinyint_col, |
| sum(int_col) over(partition by year, tinyint_col order by bigint_col), |
| avg(int_col) over(partition by year order by bigint_col), |
| lead(int_col) over(order by tinyint_col) |
| from functional.alltypes) v |
| where year = 2009 and tinyint_col = 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 07:SELECT |
| | predicates: tinyint_col = 1, year = 2009 |
| | |
| 06:ANALYTIC |
| | functions: lead(int_col, 1, NULL) |
| | order by: tinyint_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING |
| | |
| 05:SORT |
| | order by: tinyint_col ASC |
| | |
| 04:ANALYTIC |
| | functions: avg(int_col) |
| | partition by: year |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: year ASC NULLS FIRST, bigint_col ASC |
| | |
| 02:ANALYTIC |
| | functions: sum(int_col) |
| | partition by: year, tinyint_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: year ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # Propagate a predicate generated from equivalence classes |
| # through an inline with an analytic function (IMPALA-1900) |
| select * from |
| (select month, int_col, tinyint_col, |
| sum(id) over(partition by month, tinyint_col order by bigint_col) |
| from functional.alltypestiny where id = tinyint_col) v |
| where month = int_col and int_col = 1 and tinyint_col = 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SELECT |
| | predicates: month = int_col |
| | |
| 02:ANALYTIC |
| | functions: sum(id) |
| | partition by: month, tinyint_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: month ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| partitions=1/4 files=1 size=115B |
| predicates: functional.alltypestiny.id = 1, functional.alltypestiny.tinyint_col = 1 |
| ==== |
| # Don't propagate predicates through an inline view with an analytic |
| # function that has a complex (non SlotRef) partition by clause for consistency with |
| # the group by behavior (IMPALA-1900). |
| # TODO: Enable predicate propagation through inline views with complex partition by |
| # exprs. |
| select * from |
| (select t1.tinyint_col as x, t2.int_col as y, |
| sum(t1.id) over(partition by t1.tinyint_col + 1, t2.int_col - 1 order by t1.bigint_col) |
| from functional.alltypes t1 inner join functional.alltypesagg t2 |
| on t1.id = t2.id) v |
| where v.x + v.y < 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:SELECT |
| | predicates: tinyint_col + int_col < 10 |
| | |
| 04:ANALYTIC |
| | functions: sum(id) |
| | partition by: t1.tinyint_col + 1, t2.int_col - 1 |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 03:SORT |
| | order by: tinyint_col + 1 ASC NULLS FIRST, int_col - 1 ASC NULLS FIRST, bigint_col ASC |
| | |
| 02:HASH JOIN [INNER JOIN] |
| | hash predicates: t1.id = t2.id |
| | runtime filters: RF000 <- t2.id |
| | |
| |--01:SCAN HDFS [functional.alltypesagg t2] |
| | partitions=11/11 files=11 size=814.73KB |
| | |
| 00:SCAN HDFS [functional.alltypes t1] |
| partitions=24/24 files=24 size=478.45KB |
| runtime filters: RF000 -> t1.id |
| ==== |
| # Don't propagate a predicate through an inline view with an analytic function |
| # when the select list items contain a complex (non SlotRef) expr on a partition by |
| # expr (IMPALA-1900) |
| # TODO: Enable migration of predicates into inline views with analytics if the predicate |
| # is compatible with the analytics' partition by clause. |
| select * from |
| (select int_col + 1 as x, |
| sum(id) over(partition by int_col order by bigint_col) |
| from functional.alltypestiny) v |
| where x = 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SELECT |
| | predicates: int_col + 1 = 1 |
| | |
| 02:ANALYTIC |
| | functions: sum(id) |
| | partition by: int_col |
| | order by: bigint_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC NULLS FIRST, bigint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| partitions=4/4 files=4 size=460B |
| ==== |
| # IMPALA-1519: Check that the first analytic sort of a select block |
| # materializes TupleIsNullPredicates to be substituted in ancestor nodes. |
| select |
| sum(t1.id) over (partition by t1.bool_col), |
| count(1) over (order by t1.int_col), |
| avg(g) over (order by f), |
| t2.a, |
| t2.d |
| from functional.alltypestiny t1 |
| left outer join |
| (select |
| id as a, |
| coalesce(id, 10) as b, |
| int_col as c, |
| coalesce(int_col, 20) as d, |
| bigint_col e, |
| coalesce(bigint_col, 30) as f, |
| coalesce(id + bigint_col, 40) as g |
| from functional.alltypestiny) t2 |
| on (t1.id = t2.a + 100) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 08:ANALYTIC |
| | functions: avg(if(TupleIsNull(), NULL, coalesce(id + bigint_col, 40))) |
| | order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 07:SORT |
| | order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC |
| | |
| 06:ANALYTIC |
| | functions: count(1) |
| | order by: int_col ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: int_col ASC |
| | |
| 04:ANALYTIC |
| | functions: sum(id) |
| | partition by: t1.bool_col |
| | |
| 03:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 02:HASH JOIN [RIGHT OUTER JOIN] |
| | hash predicates: id + 100 = t1.id |
| | runtime filters: RF000 <- t1.id |
| | |
| |--00:SCAN HDFS [functional.alltypestiny t1] |
| | partitions=4/4 files=4 size=460B |
| | |
| 01:SCAN HDFS [functional.alltypestiny] |
| partitions=4/4 files=4 size=460B |
| runtime filters: RF000 -> id + 100 |
| ==== |
| # IMPALA-1519: Check that the first analytic sort of a select block |
| # materializes TupleIsNullPredicates to be substituted in ancestor nodes. |
| # Same test as the one above, but with an aggregation on top. |
| select avg(af1), sum(af3), count(a) |
| from |
| (select |
| sum(t1.id) over (partition by t1.bool_col) af1, |
| count(1) over (order by t1.int_col) af2, |
| avg(g) over (order by f) af3, |
| t2.a, |
| t2.d |
| from functional.alltypestiny t1 |
| left outer join |
| (select |
| id as a, |
| coalesce(id, 10) as b, |
| int_col as c, |
| coalesce(int_col, 20) as d, |
| bigint_col e, |
| coalesce(bigint_col, 30) as f, |
| coalesce(id + bigint_col, 40) as g |
| from functional.alltypestiny) t2 |
| on (t1.id = t2.a + 100)) t3 |
| group by d |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: avg(sum(t1.id)), sum(avg(g)), count(id) |
| | group by: if(TupleIsNull(), NULL, coalesce(int_col, 20)) |
| | |
| 06:ANALYTIC |
| | functions: avg(if(TupleIsNull(), NULL, coalesce(id + bigint_col, 40))) |
| | order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC |
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 05:SORT |
| | order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC |
| | |
| 04:ANALYTIC |
| | functions: sum(id) |
| | partition by: t1.bool_col |
| | |
| 03:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 02:HASH JOIN [RIGHT OUTER JOIN] |
| | hash predicates: id + 100 = t1.id |
| | runtime filters: RF000 <- t1.id |
| | |
| |--00:SCAN HDFS [functional.alltypestiny t1] |
| | partitions=4/4 files=4 size=460B |
| | |
| 01:SCAN HDFS [functional.alltypestiny] |
| partitions=4/4 files=4 size=460B |
| runtime filters: RF000 -> id + 100 |
| ==== |
| # IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate |
| # is performed correctly with analytics and multiple nesting levels. |
| # The test covers these cases: |
| # - analytic output that needs to be wrapped |
| # - analytic input that needs to be wrapped |
| # - both cases above with and without order by in the over() clause |
| # - exprs that need to be wrapped multiple times due to multiple outer joins |
| select * from |
| (select |
| a.id, |
| sum(x) over (partition by a.id) as x, |
| ifnull(y, 10) as y, |
| ifnull(z, "b") as z |
| from functional.alltypestiny a |
| left outer join |
| (select id, |
| ifnull(int_col, 1) x, |
| count(bigint_col) over(partition by id) y, |
| ifnull(string_col, "a") z |
| from functional.alltypestiny b) v1 |
| on (a.id = v1.id)) v2 |
| full outer join |
| (select |
| c.id, |
| sum(x) over (order by c.id) as x, |
| ifnull(y, 10) as y, |
| ifnull(z, "b") as z |
| from functional.alltypestiny c |
| left outer join |
| (select id, |
| ifnull(int_col, 1) x, |
| count(bigint_col) over(order by id) y, |
| ifnull(string_col, "a") z |
| from functional.alltypestiny d) v3 |
| on (c.id = v3.id)) v4 |
| on (v2.id = v4.id) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 14:HASH JOIN [FULL OUTER JOIN] |
| | hash predicates: id = id |
| | |
| |--13:ANALYTIC |
| | | functions: sum(if(TupleIsNull(), NULL, ifnull(int_col, 1))) |
| | | order by: id ASC |
| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | | |
| | 12:SORT |
| | | order by: id ASC |
| | | |
| | 11:HASH JOIN [RIGHT OUTER JOIN] |
| | | hash predicates: id = c.id |
| | | |
| | |--07:SCAN HDFS [functional.alltypestiny c] |
| | | partitions=4/4 files=4 size=460B |
| | | |
| | 10:ANALYTIC |
| | | functions: count(bigint_col) |
| | | order by: id ASC |
| | | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | | |
| | 09:SORT |
| | | order by: id ASC |
| | | |
| | 08:SCAN HDFS [functional.alltypestiny d] |
| | partitions=4/4 files=4 size=460B |
| | |
| 06:ANALYTIC |
| | functions: sum(if(TupleIsNull(), NULL, ifnull(int_col, 1))) |
| | partition by: a.id |
| | |
| 05:SORT |
| | order by: id ASC NULLS FIRST |
| | |
| 04:HASH JOIN [RIGHT OUTER JOIN] |
| | hash predicates: id = a.id |
| | |
| |--00:SCAN HDFS [functional.alltypestiny a] |
| | partitions=4/4 files=4 size=460B |
| | |
| 03:ANALYTIC |
| | functions: count(bigint_col) |
| | partition by: id |
| | |
| 02:SORT |
| | order by: id ASC NULLS FIRST |
| | |
| 01:SCAN HDFS [functional.alltypestiny b] |
| partitions=4/4 files=4 size=460B |
| ==== |
| # IMPALA-1946: Check that On-clause predicates of an outer join assigned in a scan |
| # are not wrapped in TupleIsNullPredicates. |
| select /* +straight_join */ a.id, b.id |
| from functional.alltypestiny a |
| left outer join |
| (select t1.id, ifnull(t1.int_col, 10) as int_col |
| from functional.alltypestiny t1 |
| inner join functional.alltypestiny t2 |
| on (t1.id = t2.id)) b |
| on (a.id = b.id and b.int_col < 10) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:HASH JOIN [LEFT OUTER JOIN] |
| | hash predicates: a.id = t1.id |
| | |
| |--03:HASH JOIN [INNER JOIN] |
| | | hash predicates: t2.id = t1.id |
| | | runtime filters: RF000 <- t1.id |
| | | |
| | |--01:SCAN HDFS [functional.alltypestiny t1] |
| | | partitions=4/4 files=4 size=460B |
| | | predicates: ifnull(t1.int_col, 10) < 10 |
| | | |
| | 02:SCAN HDFS [functional.alltypestiny t2] |
| | partitions=4/4 files=4 size=460B |
| | runtime filters: RF000 -> t2.id |
| | |
| 00:SCAN HDFS [functional.alltypestiny a] |
| partitions=4/4 files=4 size=460B |
| ==== |
| # IMPALA-2832: Test proper cloning of analytic function call exprs in a CTAS. |
| create table impala_2832 as select |
| first_value(int_col) over (order by int_col rows between current row and current row), |
| first_value(bigint_col) over (order by int_col rows between current row and current row) |
| from functional.alltypes |
| ---- PLAN |
| WRITE TO HDFS [default.impala_2832, OVERWRITE=false] |
| | partitions=1 |
| | |
| 02:ANALYTIC |
| | functions: last_value(int_col), last_value(bigint_col) |
| | order by: int_col ASC |
| | window: ROWS BETWEEN CURRENT ROW AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: int_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # For first/last_value(), ranges windows get rewritten as rows windows, |
| # so these should be grouped. |
| select last_value(int_col) over (order by bigint_col |
| range between unbounded preceding and current row), |
| first_value(int_col) over (order by bigint_col |
| rows between unbounded preceding and current row) |
| from functional.alltypes |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:ANALYTIC |
| | functions: last_value(int_col), first_value(int_col) |
| | order by: bigint_col ASC |
| | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| | |
| 01:SORT |
| | order by: bigint_col ASC |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # IMPALA-4263: Analytic function needs a hash exchange because the partition exprs |
| # reference a tuple that is made nullable in the join fragment. |
| select /* +straight_join */ count(*) over (partition by t1.id) |
| from functional.alltypes t1 |
| right outer join /* +shuffle */ functional.alltypessmall t2 |
| on t1.id = t2.id |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:EXCHANGE [UNPARTITIONED] |
| | |
| 04:ANALYTIC |
| | functions: count(*) |
| | partition by: t1.id |
| | |
| 03:SORT |
| | order by: id ASC NULLS FIRST |
| | |
| 07:EXCHANGE [HASH(t1.id)] |
| | |
| 02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED] |
| | hash predicates: t1.id = t2.id |
| | runtime filters: RF000 <- t2.id |
| | |
| |--06:EXCHANGE [HASH(t2.id)] |
| | | |
| | 01:SCAN HDFS [functional.alltypessmall t2] |
| | partitions=4/4 files=4 size=6.32KB |
| | |
| 05:EXCHANGE [HASH(t1.id)] |
| | |
| 00:SCAN HDFS [functional.alltypes t1] |
| partitions=24/24 files=24 size=478.45KB |
| runtime filters: RF000 -> t1.id |
| ==== |
| # IMPALA-1882: Confirm that first_value function used without a partition by and order |
| # by clause does not need a sort node |
| select first_value(tinyint_col ignore nulls) over () from functional.alltypesagg |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 01:ANALYTIC |
| | functions: first_value_ignore_nulls(tinyint_col) |
| | |
| 02:EXCHANGE [UNPARTITIONED] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # IMPALA-1882: Confirm that last_value function used without a partition by and order |
| # by clause does not need a sort node |
| select last_value(tinyint_col ignore nulls) over () from functional.alltypesagg |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 01:ANALYTIC |
| | functions: last_value_ignore_nulls(tinyint_col) |
| | |
| 02:EXCHANGE [UNPARTITIONED] |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # IMPALA-1882: Confirm that first_value function using only a partition by clause |
| # sorts over partition column |
| select *, first_value(id) over (partition by bool_col) first_val from |
| functional.alltypessmall; |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 02:ANALYTIC |
| | functions: first_value(id) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 03:EXCHANGE [HASH(bool_col)] |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| partitions=4/4 files=4 size=6.32KB |
| ==== |
| # IMPALA-1882: Confirm that last_value function using only a partition by clause |
| # sorts over partition column |
| select *, last_value(id) over (partition by bool_col) first_val from |
| functional.alltypessmall; |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 02:ANALYTIC |
| | functions: last_value(id) |
| | partition by: bool_col |
| | |
| 01:SORT |
| | order by: bool_col ASC NULLS FIRST |
| | |
| 03:EXCHANGE [HASH(bool_col)] |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| partitions=4/4 files=4 size=6.32KB |
| ==== |