blob: 2977f5d07e0f0bd77a251b74ba1f7bbc4eade48b [file] [log] [blame]
# the hash partition for aggregation is adjusted if subsequent analytic computation
# can take advantage of it
select
max(tinyint_col) over(partition by int_col)
from functional.alltypes
group by int_col, tinyint_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:EXCHANGE [UNPARTITIONED]
|
03:ANALYTIC
| functions: max(tinyint_col)
| partition by: int_col
|
02:SORT
| order by: int_col ASC NULLS FIRST
|
05:AGGREGATE [FINALIZE]
| group by: int_col, tinyint_col
|
04:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col, tinyint_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# partition groups are coalesced if the intersection of their partition exprs
# has a high enough cardinality to allow distribution across all nodes
select
max(int_col) over(partition by int_col, bool_col),
max(int_col) over(partition by int_col, tinyint_col)
from functional.alltypes
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:EXCHANGE [UNPARTITIONED]
|
04:ANALYTIC
| functions: max(int_col)
| partition by: int_col, tinyint_col
|
03:SORT
| order by: int_col ASC NULLS FIRST, tinyint_col ASC NULLS FIRST
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col, bool_col
|
01:SORT
| order by: int_col ASC NULLS FIRST, bool_col ASC NULLS FIRST
|
05:EXCHANGE [HASH(int_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# unpartitioned analytics are executed with distributed sorts
# TODO: avoid resorting on the same exprs
select
max(int_col) over(partition by int_col),
min(int_col) over(order by int_col)
from functional.alltypes
---- PLAN
PLAN-ROOT SINK
|
04:ANALYTIC
| functions: min(int_col)
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: int_col ASC
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col
|
01:SORT
| order by: int_col ASC NULLS FIRST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:ANALYTIC
| functions: min(int_col)
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC
|
03:SORT
| order by: int_col ASC
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col
|
01:SORT
| order by: int_col ASC NULLS FIRST
|
05:EXCHANGE [HASH(int_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# coalesce sort groups
select
# sort group 1
max(int_col) over(partition by bool_col order by bigint_col, tinyint_col),
max(int_col) over(partition by bool_col order by bigint_col),
max(int_col) over(partition by bool_col),
# sort group 2
max(int_col) over(partition by int_col order by bigint_col),
# sort group 3
max(int_col) over(partition by int_col order by bigint_col desc)
from functional.alltypes
---- PLAN
PLAN-ROOT SINK
|
08:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
|
07:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
| order by: bigint_col ASC, tinyint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: bool_col ASC NULLS FIRST, bigint_col ASC, tinyint_col ASC
|
04:ANALYTIC
| functions: max(int_col)
| partition by: int_col
| order by: bigint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: int_col ASC NULLS FIRST, bigint_col DESC
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: int_col ASC NULLS FIRST, bigint_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:EXCHANGE [UNPARTITIONED]
|
08:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
|
07:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
| order by: bigint_col ASC, tinyint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: bool_col ASC NULLS FIRST, bigint_col ASC, tinyint_col ASC
|
10:EXCHANGE [HASH(bool_col)]
|
04:ANALYTIC
| functions: max(int_col)
| partition by: int_col
| order by: bigint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: int_col ASC NULLS FIRST, bigint_col DESC
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: int_col ASC NULLS FIRST, bigint_col ASC
|
09:EXCHANGE [HASH(int_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# check ordering of partition, sort and window groups
select
# non-partitioning group with 2 sort groups
# 2nd sort group
min(int_col) over(order by bigint_col),
max(int_col) over(order by bigint_col),
# 1st sort group
min(int_col) over(),
# 2nd partition group, with two sort groups
# 2nd sort group
max(int_col) over(partition by bool_col order by bigint_col),
min(int_col) over(partition by bool_col order by bigint_col),
count(int_col) over(partition by bool_col order by bigint_col),
# 1st sort group
max(int_col) over(partition by bool_col order by int_col),
# 1st partition group, with two sort groups
# 2nd sort group
max(int_col) over(partition by int_col, smallint_col order by bigint_col),
min(int_col) over(partition by int_col, smallint_col order by bigint_col),
# 1st sort group
max(int_col) over(partition by int_col, smallint_col order by int_col)
from functional.alltypes
---- PLAN
PLAN-ROOT SINK
|
11:ANALYTIC
| functions: min(int_col), max(int_col)
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
10:ANALYTIC
| functions: min(int_col)
|
09:SORT
| order by: bigint_col ASC
|
08:ANALYTIC
| functions: max(int_col), min(int_col), count(int_col)
| partition by: bool_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
07:SORT
| order by: bool_col ASC NULLS FIRST, bigint_col ASC
|
06:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: bool_col ASC NULLS FIRST, int_col ASC
|
04:ANALYTIC
| functions: max(int_col), min(int_col)
| partition by: int_col, smallint_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, bigint_col ASC
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col, smallint_col
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, int_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:ANALYTIC
| functions: min(int_col), max(int_col)
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
10:ANALYTIC
| functions: min(int_col)
|
14:MERGING-EXCHANGE [UNPARTITIONED]
| order by: bigint_col ASC
|
09:SORT
| order by: bigint_col ASC
|
08:ANALYTIC
| functions: max(int_col), min(int_col), count(int_col)
| partition by: bool_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
07:SORT
| order by: bool_col ASC NULLS FIRST, bigint_col ASC
|
06:ANALYTIC
| functions: max(int_col)
| partition by: bool_col
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: bool_col ASC NULLS FIRST, int_col ASC
|
13:EXCHANGE [HASH(bool_col)]
|
04:ANALYTIC
| functions: max(int_col), min(int_col)
| partition by: int_col, smallint_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, bigint_col ASC
|
02:ANALYTIC
| functions: max(int_col)
| partition by: int_col, smallint_col
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, int_col ASC
|
12:EXCHANGE [HASH(int_col,smallint_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# basic analytic with default window and no partition/ordering
select count(*) over() from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
01:ANALYTIC
| functions: count(*)
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:ANALYTIC
| functions: count(*)
|
02:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# basic analytic with default window and partition
select tinyint_col, sum(bigint_col) over(partition by tinyint_col) sum_of_bigints
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: sum(bigint_col)
| partition by: tinyint_col
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:ANALYTIC
| functions: sum(bigint_col)
| partition by: tinyint_col
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST
|
03:EXCHANGE [HASH(tinyint_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# basic analytic with default window and ordering
select int_col, rank() over(order by int_col) from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: rank()
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: int_col ASC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: rank()
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC
|
01:SORT
| order by: int_col ASC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# analytic rows window, partition and ordering using complex expressions, with limit
select bigint_col, count(double_col)
over(partition by tinyint_col + 1, double_col / 2 order by 4 - int_col, 4 * smallint_col
rows between 1 preceding and 1 following)
from functional.alltypesagg
limit 10
---- PLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: count(double_col)
| partition by: tinyint_col + 1, double_col / 2
| order by: 4 - int_col ASC, 4 * smallint_col ASC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
| limit: 10
|
01:SORT
| order by: tinyint_col + 1 ASC NULLS FIRST, double_col / 2 ASC NULLS FIRST, 4 - int_col ASC, 4 * smallint_col ASC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
| limit: 10
|
02:ANALYTIC
| functions: count(double_col)
| partition by: tinyint_col + 1, double_col / 2
| order by: 4 - int_col ASC, 4 * smallint_col ASC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
| limit: 10
|
01:SORT
| order by: tinyint_col + 1 ASC NULLS FIRST, double_col / 2 ASC NULLS FIRST, 4 - int_col ASC, 4 * smallint_col ASC
|
03:EXCHANGE [HASH(tinyint_col + 1,double_col / 2)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# test de-duplication of analytic exprs
select
count(bigint_col)
over(partition by bool_col order by int_col desc rows between 1 preceding and 1 following),
avg(double_col)
over(partition by bool_col order by int_col desc rows between 2 preceding and 2 following),
sum(double_col)
over(partition by bool_col order by int_col desc rows between 1 preceding and 1 following),
# duplicate analytic expr
count(bigint_col)
over(partition by bool_col order by int_col desc rows between 1 preceding and 1 following),
avg(double_col)
over(order by int_col desc rows between 1 preceding and 1 following),
# duplicate analytic expr
avg(double_col)
over(partition by bool_col order by int_col desc rows between 2 preceding and 2 following)
from functional.alltypes
limit 10
---- PLAN
PLAN-ROOT SINK
|
05:ANALYTIC
| functions: avg(double_col)
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
| limit: 10
|
04:SORT
| order by: int_col DESC
|
03:ANALYTIC
| functions: count(bigint_col), sum(double_col)
| partition by: bool_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
02:ANALYTIC
| functions: avg(double_col)
| partition by: bool_col
| order by: int_col DESC
| window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
01:SORT
| order by: bool_col ASC NULLS FIRST, int_col DESC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:ANALYTIC
| functions: avg(double_col)
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
| limit: 10
|
07:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col DESC
|
04:SORT
| order by: int_col DESC
|
03:ANALYTIC
| functions: count(bigint_col), sum(double_col)
| partition by: bool_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
02:ANALYTIC
| functions: avg(double_col)
| partition by: bool_col
| order by: int_col DESC
| window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
01:SORT
| order by: bool_col ASC NULLS FIRST, int_col DESC
|
06:EXCHANGE [HASH(bool_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# analytic on the output of a join with a final order by
select a.tinyint_col, a.int_col, count(a.double_col)
over(partition by a.tinyint_col order by a.int_col desc rows between 1 preceding and 1 following)
from functional.alltypes a inner join functional.alltypessmall b on a.id = b.id
order by a.tinyint_col, a.int_col
---- PLAN
PLAN-ROOT SINK
|
05:SORT
| order by: tinyint_col ASC, int_col ASC
|
04:ANALYTIC
| functions: count(double_col)
| partition by: a.tinyint_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, int_col DESC
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:MERGING-EXCHANGE [UNPARTITIONED]
| order by: tinyint_col ASC, int_col ASC
|
05:SORT
| order by: tinyint_col ASC, int_col ASC
|
04:ANALYTIC
| functions: count(double_col)
| partition by: a.tinyint_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, int_col DESC
|
07:EXCHANGE [HASH(a.tinyint_col)]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
|
|--06:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id
====
# analytics on a grouped aggregation with a final order by
select bool_col,
sum(min(int_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
max(sum(bigint_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
min(sum(bigint_col))
over(partition by min(tinyint_col) order by sum(int_col)
rows between unbounded preceding and 4 following)
from functional.alltypes
group by 1
order by 1, 2, 3
---- PLAN
PLAN-ROOT SINK
|
06:SORT
| order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC
|
05:ANALYTIC
| functions: sum(min(int_col)), max(sum(bigint_col))
| partition by: min(tinyint_col)
| order by: max(int_col) ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
04:SORT
| order by: min(tinyint_col) ASC NULLS FIRST, max(int_col) ASC
|
03:ANALYTIC
| functions: min(sum(bigint_col))
| partition by: min(tinyint_col)
| order by: sum(int_col) ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 FOLLOWING
|
02:SORT
| order by: min(tinyint_col) ASC NULLS FIRST, sum(int_col) ASC
|
01:AGGREGATE [FINALIZE]
| output: min(int_col), min(tinyint_col), max(int_col), sum(bigint_col), sum(int_col)
| group by: bool_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:MERGING-EXCHANGE [UNPARTITIONED]
| order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC
|
06:SORT
| order by: bool_col ASC, sum(min(int_col)) ASC, max(sum(bigint_col)) ASC
|
05:ANALYTIC
| functions: sum(min(int_col)), max(sum(bigint_col))
| partition by: min(tinyint_col)
| order by: max(int_col) ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
04:SORT
| order by: min(tinyint_col) ASC NULLS FIRST, max(int_col) ASC
|
03:ANALYTIC
| functions: min(sum(bigint_col))
| partition by: min(tinyint_col)
| order by: sum(int_col) ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 FOLLOWING
|
02:SORT
| order by: min(tinyint_col) ASC NULLS FIRST, sum(int_col) ASC
|
09:EXCHANGE [HASH(min(tinyint_col))]
|
08:AGGREGATE [FINALIZE]
| output: min:merge(int_col), min:merge(tinyint_col), max:merge(int_col), sum:merge(bigint_col), sum:merge(int_col)
| group by: bool_col
|
07:EXCHANGE [HASH(bool_col)]
|
01:AGGREGATE [STREAMING]
| output: min(int_col), min(tinyint_col), max(int_col), sum(bigint_col), sum(int_col)
| group by: bool_col
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# grouping of multiple analytic exprs by compatible window/partition/order;
# the distributed plan repartitions only once on tinyint_col
select
# sort group 1
count(double_col)
over(partition by tinyint_col, double_col order by int_col desc
rows between 1 preceding and 1 following),
last_value(double_col)
over(partition by double_col, tinyint_col order by int_col desc
rows between 1 preceding and 1 following),
# compatible default RANGE window
sum(tinyint_col)
over(partition by tinyint_col, double_col order by int_col desc),
# compatible RANGE window
# TODO: Revert window when RANGE offsets are supported
sum(smallint_col)
over(partition by double_col, tinyint_col order by int_col desc
range between unbounded preceding and unbounded following),
# range between 1 preceding and 1 following),
# different window
first_value(int_col)
over(partition by double_col, tinyint_col order by int_col desc
rows between 2 preceding and 2 following),
# sort group 2
last_value(int_col ignore nulls)
over(partition by double_col, tinyint_col order by int_col asc
rows between 2 preceding and 2 following),
# different partition
first_value(int_col)
over(partition by double_col, tinyint_col order by int_col asc
rows between 2 preceding and 2 following),
# same partition group but different sort group
min(int_col)
over(partition by tinyint_col order by int_col desc
rows between unbounded preceding and 2 following)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
10:ANALYTIC
| functions: count(double_col), last_value(double_col)
| partition by: tinyint_col, double_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
09:ANALYTIC
| functions: sum(smallint_col)
| partition by: double_col, tinyint_col
| order by: int_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
08:ANALYTIC
| functions: sum(tinyint_col)
| partition by: tinyint_col, double_col
| order by: int_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
07:ANALYTIC
| functions: first_value_rewrite(int_col, -1)
| partition by: double_col, tinyint_col
| order by: int_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING
|
06:SORT
| order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC
|
05:ANALYTIC
| functions: first_value_rewrite(int_col, -1)
| partition by: double_col, tinyint_col
| order by: int_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING
|
04:ANALYTIC
| functions: last_value_ignore_nulls(int_col)
| partition by: double_col, tinyint_col
| order by: int_col ASC
| window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
03:SORT
| order by: double_col ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, int_col ASC
|
02:ANALYTIC
| functions: min(int_col)
| partition by: tinyint_col
| order by: int_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, int_col DESC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
12:EXCHANGE [UNPARTITIONED]
|
10:ANALYTIC
| functions: count(double_col), last_value(double_col)
| partition by: tinyint_col, double_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
09:ANALYTIC
| functions: sum(smallint_col)
| partition by: double_col, tinyint_col
| order by: int_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
08:ANALYTIC
| functions: sum(tinyint_col)
| partition by: tinyint_col, double_col
| order by: int_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
07:ANALYTIC
| functions: first_value_rewrite(int_col, -1)
| partition by: double_col, tinyint_col
| order by: int_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING
|
06:SORT
| order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC
|
05:ANALYTIC
| functions: first_value_rewrite(int_col, -1)
| partition by: double_col, tinyint_col
| order by: int_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 PRECEDING
|
04:ANALYTIC
| functions: last_value_ignore_nulls(int_col)
| partition by: double_col, tinyint_col
| order by: int_col ASC
| window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
03:SORT
| order by: double_col ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, int_col ASC
|
02:ANALYTIC
| functions: min(int_col)
| partition by: tinyint_col
| order by: int_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, int_col DESC
|
11:EXCHANGE [HASH(tinyint_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# grouping of multiple analytic exprs by compatible window/partition/order
select
count(double_col)
over(partition by tinyint_col, double_col order by int_col desc
rows between 1 preceding and 1 following),
# unpartitioned default RANGE window
sum(tinyint_col)
over(order by int_col desc),
# partition compatible with first analytic expr but no order by
sum(smallint_col)
over(partition by double_col, tinyint_col),
# incompatible analytic expr
sum(smallint_col)
over(partition by bigint_col order by tinyint_col
rows between 2 preceding and 2 following)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
07:ANALYTIC
| functions: sum(tinyint_col)
| order by: int_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:SORT
| order by: int_col DESC
|
05:ANALYTIC
| functions: sum(smallint_col)
| partition by: double_col, tinyint_col
|
04:ANALYTIC
| functions: count(double_col)
| partition by: tinyint_col, double_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC
|
02:ANALYTIC
| functions: sum(smallint_col)
| partition by: bigint_col
| order by: tinyint_col ASC
| window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
01:SORT
| order by: bigint_col ASC NULLS FIRST, tinyint_col ASC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:ANALYTIC
| functions: sum(tinyint_col)
| order by: int_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
10:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col DESC
|
06:SORT
| order by: int_col DESC
|
05:ANALYTIC
| functions: sum(smallint_col)
| partition by: double_col, tinyint_col
|
04:ANALYTIC
| functions: count(double_col)
| partition by: tinyint_col, double_col
| order by: int_col DESC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, double_col ASC NULLS FIRST, int_col DESC
|
09:EXCHANGE [HASH(tinyint_col,double_col)]
|
02:ANALYTIC
| functions: sum(smallint_col)
| partition by: bigint_col
| order by: tinyint_col ASC
| window: ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
01:SORT
| order by: bigint_col ASC NULLS FIRST, tinyint_col ASC
|
08:EXCHANGE [HASH(bigint_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# basic test for analytics and inline views
select double_col, a, b, a + b, double_col + a from
(select
double_col,
count(int_col) over() a,
sum(int_col + bigint_col) over(partition by bool_col) b
from
(select * from functional.alltypes) v1) v2
order by 2, 3, 4
---- PLAN
PLAN-ROOT SINK
|
04:SORT
| order by: a ASC, b ASC, a + b ASC
|
03:ANALYTIC
| functions: count(int_col)
|
02:ANALYTIC
| functions: sum(int_col + bigint_col)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:SORT
| order by: a ASC, b ASC, a + b ASC
|
03:ANALYTIC
| functions: count(int_col)
|
06:EXCHANGE [UNPARTITIONED]
|
02:ANALYTIC
| functions: sum(int_col + bigint_col)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
05:EXCHANGE [HASH(functional.alltypes.bool_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# same as above but using a WITH-clause view
with v2 as
(select
double_col,
count(int_col) over() a,
sum(int_col + bigint_col) over(partition by bool_col) b
from
(select * from functional.alltypes) v1)
select double_col, a, b, a + b, double_col + a from v2
order by 2, 3, 4
---- PLAN
PLAN-ROOT SINK
|
04:SORT
| order by: a ASC, b ASC, a + b ASC
|
03:ANALYTIC
| functions: count(int_col)
|
02:ANALYTIC
| functions: sum(int_col + bigint_col)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:SORT
| order by: a ASC, b ASC, a + b ASC
|
03:ANALYTIC
| functions: count(int_col)
|
06:EXCHANGE [UNPARTITIONED]
|
02:ANALYTIC
| functions: sum(int_col + bigint_col)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
05:EXCHANGE [HASH(functional.alltypes.bool_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# test ignoring of non-materialized analytic exprs
select b from
(select
count(int_col) over(order by id) a,
sum(int_col) over(partition by bigint_col) b,
max(tinyint_col) over() c,
min(double_col) over(partition by bool_col order by string_col) d,
count(1) over(partition by bool_col order by string_col
rows between unbounded preceding and 1 following) e
from functional.alltypes) v
where e < 10
---- PLAN
PLAN-ROOT SINK
|
05:SELECT
| predicates: count(1) < 10
|
04:ANALYTIC
| functions: count(1)
| partition by: bool_col
| order by: string_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: bool_col ASC NULLS FIRST, string_col ASC
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: bigint_col
|
01:SORT
| order by: bigint_col ASC NULLS FIRST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
05:SELECT
| predicates: count(1) < 10
|
04:ANALYTIC
| functions: count(1)
| partition by: bool_col
| order by: string_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: bool_col ASC NULLS FIRST, string_col ASC
|
07:EXCHANGE [HASH(bool_col)]
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: bigint_col
|
01:SORT
| order by: bigint_col ASC NULLS FIRST
|
06:EXCHANGE [HASH(bigint_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# basic test for analytics and unions
select min(id) over (partition by int_col)
from functional.alltypes
union distinct
select max(id) over (partition by bool_col)
from functional.alltypessmall
union all
(select sum(bigint_col) over (partition by int_col order by id)
from functional.alltypestiny)
order by 1 desc nulls first
---- PLAN
PLAN-ROOT SINK
|
12:SORT
| order by: min(id) OVER(...) DESC NULLS FIRST
|
08:UNION
| pass-through-operands: 07
|
|--11:ANALYTIC
| | functions: sum(bigint_col)
| | partition by: int_col
| | order by: id ASC
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| |
| 10:SORT
| | order by: int_col ASC NULLS FIRST, id ASC
| |
| 09:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
|
07:AGGREGATE [FINALIZE]
| group by: min(id) OVER(...)
|
00:UNION
|
|--06:ANALYTIC
| | functions: max(id)
| | partition by: bool_col
| |
| 05:SORT
| | order by: bool_col ASC NULLS FIRST
| |
| 04:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
|
03:ANALYTIC
| functions: min(id)
| partition by: int_col
|
02:SORT
| order by: int_col ASC NULLS FIRST
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
18:MERGING-EXCHANGE [UNPARTITIONED]
| order by: min(id) OVER(...) DESC NULLS FIRST
|
12:SORT
| order by: min(id) OVER(...) DESC NULLS FIRST
|
08:UNION
| pass-through-operands: 16
|
|--11:ANALYTIC
| | functions: sum(bigint_col)
| | partition by: int_col
| | order by: id ASC
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| |
| 10:SORT
| | order by: int_col ASC NULLS FIRST, id ASC
| |
| 17:EXCHANGE [HASH(int_col)]
| |
| 09:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
|
16:AGGREGATE [FINALIZE]
| group by: min(id) OVER(...)
|
15:EXCHANGE [HASH(min(id) OVER(...))]
|
07:AGGREGATE [STREAMING]
| group by: min(id) OVER(...)
|
00:UNION
|
|--06:ANALYTIC
| | functions: max(id)
| | partition by: bool_col
| |
| 05:SORT
| | order by: bool_col ASC NULLS FIRST
| |
| 14:EXCHANGE [HASH(bool_col)]
| |
| 04:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
|
03:ANALYTIC
| functions: min(id)
| partition by: int_col
|
02:SORT
| order by: int_col ASC NULLS FIRST
|
13:EXCHANGE [HASH(int_col)]
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# analytics in an uncorrelated subquery
select id, int_col, bool_col from functional.alltypessmall t1
where int_col in
(select min(bigint_col) over(partition by bool_col)
from functional.alltypestiny t2 where t2.id < 10)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = min(bigint_col)
| runtime filters: RF000 <- min(bigint_col)
|
|--03:ANALYTIC
| | functions: min(bigint_col)
| | partition by: bool_col
| |
| 02:SORT
| | order by: bool_col ASC NULLS FIRST
| |
| 01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 files=4 size=460B
| predicates: t2.id < 10
|
00:SCAN HDFS [functional.alltypessmall t1]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> int_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: int_col = min(bigint_col)
| runtime filters: RF000 <- min(bigint_col)
|
|--06:EXCHANGE [BROADCAST]
| |
| 03:ANALYTIC
| | functions: min(bigint_col)
| | partition by: bool_col
| |
| 02:SORT
| | order by: bool_col ASC NULLS FIRST
| |
| 05:EXCHANGE [HASH(bool_col)]
| |
| 01:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 files=4 size=460B
| predicates: t2.id < 10
|
00:SCAN HDFS [functional.alltypessmall t1]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> int_col
====
# test conjunct assignment and slot materialization due to conjuncts
# (see IMPALA-1243)
select 1 from
(select bigint_col,
min(int_col) over() a,
max(int_col) over(partition by bool_col) b,
count(int_col) over(partition by bool_col) c,
sum(int_col) over(partition by bigint_col order by id
rows between unbounded preceding and 1 following) d,
avg(int_col) over(partition by bigint_col order by id
rows between unbounded preceding and 2 following) e
from functional.alltypes
# assigned in scan
where int_col between 5 and 10) v
where
# assigned to separate Select node
v.bigint_col > 10 and
v.a < 1 and
v.a < v.bigint_col + 1 and
v.b < 2 and
v.b < v.bigint_col + 2 and
v.c < 3 and
v.c < v.bigint_col + 3 and
v.d < 4 and
v.d < v.bigint_col + 4 and
v.e < 5 and
v.e < v.bigint_col + 5 and
v.a != v.c and
v.a != v.e and
v.b != v.c
---- PLAN
PLAN-ROOT SINK
|
07:SELECT
| predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
|
06:ANALYTIC
| functions: min(int_col)
|
05:ANALYTIC
| functions: avg(int_col)
| partition by: bigint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING
|
04:ANALYTIC
| functions: sum(int_col)
| partition by: bigint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: bigint_col ASC NULLS FIRST, id ASC
|
02:ANALYTIC
| functions: max(int_col), count(int_col)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col <= 10, int_col >= 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:SELECT
| predicates: min(int_col) < 1, max(int_col) < 2, bigint_col > 10, count(int_col) < 3, sum(int_col) < 4, avg(int_col) < 5, min(int_col) != count(int_col), min(int_col) != avg(int_col), max(int_col) != count(int_col), count(int_col) < bigint_col + 3, sum(int_col) < bigint_col + 4, min(int_col) < bigint_col + 1, max(int_col) < bigint_col + 2, avg(int_col) < bigint_col + 5
|
06:ANALYTIC
| functions: min(int_col)
|
10:EXCHANGE [UNPARTITIONED]
|
05:ANALYTIC
| functions: avg(int_col)
| partition by: bigint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING
|
04:ANALYTIC
| functions: sum(int_col)
| partition by: bigint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
03:SORT
| order by: bigint_col ASC NULLS FIRST, id ASC
|
09:EXCHANGE [HASH(bigint_col)]
|
02:ANALYTIC
| functions: max(int_col), count(int_col)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
08:EXCHANGE [HASH(bool_col)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: int_col <= 10, int_col >= 5
====
# test predicate propagation onto and through analytic nodes
# TODO: allow AnalyticEvalNode to apply a < 20
select 1 from
(select id, int_col, bigint_col,
sum(int_col) over(partition by bigint_col order by id
rows between unbounded preceding and 1 following) a,
avg(int_col) over(partition by bigint_col order by id
rows between unbounded preceding and 2 following) b
from functional.alltypes) t1
inner join functional.alltypes t2
on (t1.id = t2.id and t1.a = t2.int_col)
where t2.id < 10 and t2.int_col < 20
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: id = t2.id, sum(int_col) = t2.int_col
|
|--03:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
| predicates: t2.id < 10, t2.int_col < 20
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: bigint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
01:SORT
| order by: bigint_col ASC NULLS FIRST, id ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# test that predicates are correctly propagated in the presence of outer joins
# (i.e., no predicates should be propagated in this query)
select 1 from
(select id, int_col, bigint_col,
sum(int_col) over(partition by bigint_col order by id
rows between unbounded preceding and 1 following) a,
avg(int_col) over(partition by bigint_col order by id
rows between unbounded preceding and 2 following) b
from functional.alltypes) t1
left outer join functional.alltypes t2
on (t1.id = t2.id and t1.a = t2.int_col)
where t2.id < 10 and t2.int_col < 20
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: id = t2.id, sum(int_col) = t2.int_col
| other predicates: t2.id < 10, t2.int_col < 20
|
|--03:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
| predicates: t2.id < 10, t2.int_col < 20
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: bigint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
01:SORT
| order by: bigint_col ASC NULLS FIRST, id ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# test canonical function/window/order: row_number() gets a ROWS window
select
row_number() over(partition by tinyint_col order by id)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: row_number()
| partition by: tinyint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# test canonical function/window/order: lead() and lag() have default
# arguments explicitly set
select
lead(int_col) over(partition by tinyint_col order by id),
lag(int_col) over(partition by tinyint_col order by id),
lead(int_col, 4) over(partition by smallint_col order by id),
lag(int_col, 4) over(partition by smallint_col order by id),
lead(int_col, 8, 20) over(partition by int_col order by id),
lag(int_col, 8, 20) over(partition by int_col order by id)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
09:ANALYTIC
| functions: lag(int_col, 8, 20)
| partition by: int_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 8 PRECEDING
|
08:ANALYTIC
| functions: lead(int_col, 8, 20)
| partition by: int_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 8 FOLLOWING
|
07:SORT
| order by: int_col ASC NULLS FIRST, id ASC
|
06:ANALYTIC
| functions: lag(int_col, 4, NULL)
| partition by: smallint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 PRECEDING
|
05:ANALYTIC
| functions: lead(int_col, 4, NULL)
| partition by: smallint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 4 FOLLOWING
|
04:SORT
| order by: smallint_col ASC NULLS FIRST, id ASC
|
03:ANALYTIC
| functions: lag(int_col, 1, NULL)
| partition by: tinyint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
|
02:ANALYTIC
| functions: lead(int_col, 1, NULL)
| partition by: tinyint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# Test canonical function/window/order: Reverse windows ending in UNBOUNDED FOLLOWING
# and not starting with UNBOUNDED PRECEDING.
select
# windows and sort order should be reversed
sum(int_col) over(partition by tinyint_col order by id desc nulls first, bool_col asc nulls last
rows between 2 preceding and unbounded following),
# TODO: Revert window when RANGE offsets are supported
sum(int_col) over(partition by tinyint_col order by id asc nulls last
range between current row and unbounded following),
# range between 4 preceding and unbounded following),
# windows and sort order should remain unchanged
count(bigint_col) over(partition by tinyint_col order by id),
count(bigint_col) over(partition by tinyint_col order by id, int_col
rows between 6 preceding and 8 following),
# TODO: Revert window when RANGE offsets are supported
count(bigint_col) over(partition by tinyint_col order by id
range between unbounded preceding and unbounded following)
# range between unbounded preceding and 10 following)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
08:ANALYTIC
| functions: count(bigint_col)
| partition by: tinyint_col
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
07:ANALYTIC
| functions: count(bigint_col)
| partition by: tinyint_col
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:ANALYTIC
| functions: sum(int_col)
| partition by: tinyint_col
| order by: id ASC, bool_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING
|
05:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC NULLS LAST, bool_col DESC NULLS FIRST
|
04:ANALYTIC
| functions: count(bigint_col)
| partition by: tinyint_col
| order by: id ASC, int_col ASC
| window: ROWS BETWEEN 6 PRECEDING AND 8 FOLLOWING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC, int_col ASC
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: tinyint_col
| order by: id DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, id DESC NULLS FIRST
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# Test canonical function/window/order: Reverse windows ending in UNBOUNDED FOLLOWING
# and either not starting with UNBOUNDED PRECEDING or first_value(... IGNORE NULLS), and
# change first_value() to last_value().
# Set the end boundary to CURRENT_ROW for first_value() if the start boundary is
# UNBOUNDED_PRECEDING and not IGNORE NULLS.
select
# windows, sort order and function should be reversed
last_value(int_col) over(partition by tinyint_col order by id desc nulls first, bool_col asc nulls last
rows between 2 preceding and unbounded following),
# TODO: Revert window when RANGE offsets are supported
last_value(int_col) over(partition by tinyint_col order by id asc nulls last
range between current row and unbounded following),
# windows, sort order and function should remain unchanged
first_value(bigint_col ignore nulls) over(partition by tinyint_col order by id),
first_value(bigint_col) over(partition by tinyint_col order by id, int_col
rows between 6 preceding and 8 following),
# TODO: Revert window when RANGE offsets are supported
first_value(bigint_col) over(partition by tinyint_col order by id
range between unbounded preceding and unbounded following),
# window, order, and function should be reversed
first_value(tinyint_col ignore nulls) over (order by id
rows between 1 following and 2 following)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
09:ANALYTIC
| functions: last_value_ignore_nulls(tinyint_col)
| order by: id DESC
| window: ROWS BETWEEN 2 PRECEDING AND 1 PRECEDING
|
08:SORT
| order by: id DESC NULLS FIRST
|
07:ANALYTIC
| functions: first_value_ignore_nulls(bigint_col), first_value(bigint_col)
| partition by: tinyint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:ANALYTIC
| functions: first_value(int_col)
| partition by: tinyint_col
| order by: id ASC, bool_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC NULLS LAST, bool_col DESC NULLS FIRST
|
04:ANALYTIC
| functions: first_value_rewrite(bigint_col, -1)
| partition by: tinyint_col
| order by: id ASC, int_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 6 PRECEDING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC, int_col ASC
|
02:ANALYTIC
| functions: first_value(int_col)
| partition by: tinyint_col
| order by: id DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, id DESC NULLS FIRST
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# Test canonical function/window/order: Reverse windows ending in UNBOUNDED FOLLOWING
# and not starting with UNBOUNDED PRECEDING, and change last_value() to first_value()
select
# windows, sort order and function should be reversed
last_value(int_col) over(partition by tinyint_col order by id desc nulls first,
bool_col asc nulls last
rows between 2 preceding and unbounded following),
# TODO: Revert window when RANGE offsets are supported
last_value(int_col) over(partition by tinyint_col order by id asc nulls last
range between current row and unbounded following),
# windows, sort order and function should remain unchanged
last_value(bigint_col) over(partition by tinyint_col order by id),
last_value(bigint_col) over(partition by tinyint_col order by id, int_col
rows between 6 preceding and 8 following),
# TODO: Revert window when RANGE offsets are supported
last_value(bigint_col) over(partition by tinyint_col order by id
range between unbounded preceding and unbounded following)
from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
08:ANALYTIC
| functions: last_value(bigint_col)
| partition by: tinyint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
07:ANALYTIC
| functions: last_value(bigint_col)
| partition by: tinyint_col
| order by: id ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
06:ANALYTIC
| functions: first_value(int_col)
| partition by: tinyint_col
| order by: id ASC, bool_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC NULLS LAST, bool_col DESC NULLS FIRST
|
04:ANALYTIC
| functions: last_value(bigint_col)
| partition by: tinyint_col
| order by: id ASC, int_col ASC
| window: ROWS BETWEEN 6 PRECEDING AND 8 FOLLOWING
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC, int_col ASC
|
02:ANALYTIC
| functions: first_value(int_col)
| partition by: tinyint_col
| order by: id DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: tinyint_col ASC NULLS FIRST, id DESC NULLS FIRST
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# IMPALA-1229
select DENSE_RANK() OVER (ORDER BY t1.day ASC)
FROM functional.alltypesagg t1
WHERE EXISTS (SELECT t1.year AS int_col_1 FROM functional.alltypesagg t1)
---- PLAN
PLAN-ROOT SINK
|
04:ANALYTIC
| functions: dense_rank()
| order by: day ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: day ASC
|
02:NESTED LOOP JOIN [LEFT SEMI JOIN]
|
|--01:SCAN HDFS [functional.alltypesagg t1]
| partitions=11/11 files=11 size=814.73KB
| limit: 1
|
00:SCAN HDFS [functional.alltypesagg t1]
partitions=11/11 files=11 size=814.73KB
====
# IMPALA-1243: the Where clause predicate needs to be evaluated in a Select node
# not as a scan predicate of alltypes
select COUNT(*)
FROM (
SELECT id, tinyint_col
FROM functional.alltypestiny t1
UNION ALL
SELECT DENSE_RANK() OVER (ORDER BY t1.id), tinyint_col
FROM functional.alltypes t1 ) t1
WHERE id IS NULL and tinyint_col != 5
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count(*)
|
05:SELECT
| predicates: id IS NULL, tinyint_col != 5
|
00:UNION
|
|--04:ANALYTIC
| | functions: dense_rank()
| | order by: id ASC
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| |
| 03:SORT
| | order by: id ASC
| |
| 02:SCAN HDFS [functional.alltypes t1]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 files=4 size=460B
predicates: t1.id IS NULL, t1.tinyint_col != 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:AGGREGATE [FINALIZE]
| output: count:merge(*)
|
09:EXCHANGE [UNPARTITIONED]
|
06:AGGREGATE
| output: count(*)
|
05:SELECT
| predicates: id IS NULL, tinyint_col != 5
|
00:UNION
|
|--08:EXCHANGE [RANDOM]
| |
| 04:ANALYTIC
| | functions: dense_rank()
| | order by: id ASC
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| |
| 07:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: id ASC
| |
| 03:SORT
| | order by: id ASC
| |
| 02:SCAN HDFS [functional.alltypes t1]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 files=4 size=460B
predicates: t1.id IS NULL, t1.tinyint_col != 5
====
# Propagate a predicate on a partition key through an inline view that has an analytic
# function. Predicates that are not compatible with analytic function's partition by
# clause are not propagated (IMPALA-1900).
select * from
(select id, int_col, year,
sum(int_col) over(partition by year order by id) as s
from functional.alltypes) v
where year = 2009 and id = 1 and int_col < 10 and s = 4
---- PLAN
PLAN-ROOT SINK
|
03:SELECT
| predicates: id = 1, int_col < 10, sum(int_col) = 4
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: year
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: year ASC NULLS FIRST, id ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=12/24 files=12 size=238.68KB
====
# Propagate predicates through an inline view that computes multiple analytic functions
# (IMPALA-1900)
select * from
(select year, tinyint_col,
last_value(int_col) over(partition by int_col, year order by id
rows between 1 preceding and 1 following),
last_value(tinyint_col) over(partition by id, year order by int_col
range between unbounded preceding and unbounded following),
sum(int_col) over(partition by year, tinyint_col),
avg(int_col) over(partition by tinyint_col, id, year order by bigint_col)
from functional.alltypes) v
where year = 2009 and tinyint_col + 1 = 1
---- PLAN
PLAN-ROOT SINK
|
09:SELECT
| predicates: tinyint_col + 1 = 1
|
08:ANALYTIC
| functions: sum(int_col)
| partition by: year, tinyint_col
|
07:SORT
| order by: year ASC NULLS FIRST, tinyint_col ASC NULLS FIRST
|
06:ANALYTIC
| functions: last_value(int_col)
| partition by: int_col, year
| order by: id ASC
| window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
|
05:SORT
| order by: int_col ASC NULLS FIRST, year ASC NULLS FIRST, id ASC
|
04:ANALYTIC
| functions: avg(int_col)
| partition by: tinyint_col, id, year
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: tinyint_col ASC NULLS FIRST, id ASC NULLS FIRST, year ASC NULLS FIRST, bigint_col ASC
|
02:ANALYTIC
| functions: last_value(tinyint_col)
| partition by: id, year
| order by: int_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
01:SORT
| order by: id ASC NULLS FIRST, year ASC NULLS FIRST, int_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=12/24 files=12 size=238.68KB
====
# Don't propagate predicates through an inline view with multiple analytic functions
# if the predicates are not compatible with every analytic function's partition by
# clause (IMPALA-1900)
select * from
(select year, tinyint_col,
sum(int_col) over(partition by year, tinyint_col order by bigint_col),
avg(int_col) over(partition by year order by bigint_col),
lead(int_col) over(order by tinyint_col)
from functional.alltypes) v
where year = 2009 and tinyint_col = 1
---- PLAN
PLAN-ROOT SINK
|
07:SELECT
| predicates: tinyint_col = 1, year = 2009
|
06:ANALYTIC
| functions: lead(int_col, 1, NULL)
| order by: tinyint_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING
|
05:SORT
| order by: tinyint_col ASC
|
04:ANALYTIC
| functions: avg(int_col)
| partition by: year
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: year ASC NULLS FIRST, bigint_col ASC
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: year, tinyint_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: year ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, bigint_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# Propagate a predicate generated from equivalence classes
# through an inline with an analytic function (IMPALA-1900)
select * from
(select month, int_col, tinyint_col,
sum(id) over(partition by month, tinyint_col order by bigint_col)
from functional.alltypestiny where id = tinyint_col) v
where month = int_col and int_col = 1 and tinyint_col = 1
---- PLAN
PLAN-ROOT SINK
|
03:SELECT
| predicates: month = int_col
|
02:ANALYTIC
| functions: sum(id)
| partition by: month, tinyint_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: month ASC NULLS FIRST, tinyint_col ASC NULLS FIRST, bigint_col ASC
|
00:SCAN HDFS [functional.alltypestiny]
partitions=1/4 files=1 size=115B
predicates: functional.alltypestiny.id = 1, functional.alltypestiny.tinyint_col = 1
====
# Don't propagate predicates through an inline view with an analytic
# function that has a complex (non SlotRef) partition by clause for consistency with
# the group by behavior (IMPALA-1900).
# TODO: Enable predicate propagation through inline views with complex partition by
# exprs.
select * from
(select t1.tinyint_col as x, t2.int_col as y,
sum(t1.id) over(partition by t1.tinyint_col + 1, t2.int_col - 1 order by t1.bigint_col)
from functional.alltypes t1 inner join functional.alltypesagg t2
on t1.id = t2.id) v
where v.x + v.y < 10
---- PLAN
PLAN-ROOT SINK
|
05:SELECT
| predicates: tinyint_col + int_col < 10
|
04:ANALYTIC
| functions: sum(id)
| partition by: t1.tinyint_col + 1, t2.int_col - 1
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
03:SORT
| order by: tinyint_col + 1 ASC NULLS FIRST, int_col - 1 ASC NULLS FIRST, bigint_col ASC
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
|
|--01:SCAN HDFS [functional.alltypesagg t2]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t1.id
====
# Don't propagate a predicate through an inline view with an analytic function
# when the select list items contain a complex (non SlotRef) expr on a partition by
# expr (IMPALA-1900)
# TODO: Enable migration of predicates into inline views with analytics if the predicate
# is compatible with the analytics' partition by clause.
select * from
(select int_col + 1 as x,
sum(id) over(partition by int_col order by bigint_col)
from functional.alltypestiny) v
where x = 1
---- PLAN
PLAN-ROOT SINK
|
03:SELECT
| predicates: int_col + 1 = 1
|
02:ANALYTIC
| functions: sum(id)
| partition by: int_col
| order by: bigint_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: int_col ASC NULLS FIRST, bigint_col ASC
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====
# IMPALA-1519: Check that the first analytic sort of a select block
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
select
sum(t1.id) over (partition by t1.bool_col),
count(1) over (order by t1.int_col),
avg(g) over (order by f),
t2.a,
t2.d
from functional.alltypestiny t1
left outer join
(select
id as a,
coalesce(id, 10) as b,
int_col as c,
coalesce(int_col, 20) as d,
bigint_col e,
coalesce(bigint_col, 30) as f,
coalesce(id + bigint_col, 40) as g
from functional.alltypestiny) t2
on (t1.id = t2.a + 100)
---- PLAN
PLAN-ROOT SINK
|
08:ANALYTIC
| functions: avg(if(TupleIsNull(), NULL, coalesce(id + bigint_col, 40)))
| order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
07:SORT
| order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC
|
06:ANALYTIC
| functions: count(1)
| order by: int_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: int_col ASC
|
04:ANALYTIC
| functions: sum(id)
| partition by: t1.bool_col
|
03:SORT
| order by: bool_col ASC NULLS FIRST
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id + 100 = t1.id
| runtime filters: RF000 <- t1.id
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
01:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> id + 100
====
# IMPALA-1519: Check that the first analytic sort of a select block
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
# Same test as the one above, but with an aggregation on top.
select avg(af1), sum(af3), count(a)
from
(select
sum(t1.id) over (partition by t1.bool_col) af1,
count(1) over (order by t1.int_col) af2,
avg(g) over (order by f) af3,
t2.a,
t2.d
from functional.alltypestiny t1
left outer join
(select
id as a,
coalesce(id, 10) as b,
int_col as c,
coalesce(int_col, 20) as d,
bigint_col e,
coalesce(bigint_col, 30) as f,
coalesce(id + bigint_col, 40) as g
from functional.alltypestiny) t2
on (t1.id = t2.a + 100)) t3
group by d
---- PLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
| output: avg(sum(t1.id)), sum(avg(g)), count(id)
| group by: if(TupleIsNull(), NULL, coalesce(int_col, 20))
|
06:ANALYTIC
| functions: avg(if(TupleIsNull(), NULL, coalesce(id + bigint_col, 40)))
| order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
05:SORT
| order by: if(TupleIsNull(), NULL, coalesce(bigint_col, 30)) ASC
|
04:ANALYTIC
| functions: sum(id)
| partition by: t1.bool_col
|
03:SORT
| order by: bool_col ASC NULLS FIRST
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id + 100 = t1.id
| runtime filters: RF000 <- t1.id
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
01:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> id + 100
====
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
# is performed correctly with analytics and multiple nesting levels.
# The test covers these cases:
# - analytic output that needs to be wrapped
# - analytic input that needs to be wrapped
# - both cases above with and without order by in the over() clause
# - exprs that need to be wrapped multiple times due to multiple outer joins
select * from
(select
a.id,
sum(x) over (partition by a.id) as x,
ifnull(y, 10) as y,
ifnull(z, "b") as z
from functional.alltypestiny a
left outer join
(select id,
ifnull(int_col, 1) x,
count(bigint_col) over(partition by id) y,
ifnull(string_col, "a") z
from functional.alltypestiny b) v1
on (a.id = v1.id)) v2
full outer join
(select
c.id,
sum(x) over (order by c.id) as x,
ifnull(y, 10) as y,
ifnull(z, "b") as z
from functional.alltypestiny c
left outer join
(select id,
ifnull(int_col, 1) x,
count(bigint_col) over(order by id) y,
ifnull(string_col, "a") z
from functional.alltypestiny d) v3
on (c.id = v3.id)) v4
on (v2.id = v4.id)
---- PLAN
PLAN-ROOT SINK
|
14:HASH JOIN [FULL OUTER JOIN]
| hash predicates: id = id
|
|--13:ANALYTIC
| | functions: sum(if(TupleIsNull(), NULL, ifnull(int_col, 1)))
| | order by: id ASC
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| |
| 12:SORT
| | order by: id ASC
| |
| 11:HASH JOIN [RIGHT OUTER JOIN]
| | hash predicates: id = c.id
| |
| |--07:SCAN HDFS [functional.alltypestiny c]
| | partitions=4/4 files=4 size=460B
| |
| 10:ANALYTIC
| | functions: count(bigint_col)
| | order by: id ASC
| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| |
| 09:SORT
| | order by: id ASC
| |
| 08:SCAN HDFS [functional.alltypestiny d]
| partitions=4/4 files=4 size=460B
|
06:ANALYTIC
| functions: sum(if(TupleIsNull(), NULL, ifnull(int_col, 1)))
| partition by: a.id
|
05:SORT
| order by: id ASC NULLS FIRST
|
04:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id = a.id
|
|--00:SCAN HDFS [functional.alltypestiny a]
| partitions=4/4 files=4 size=460B
|
03:ANALYTIC
| functions: count(bigint_col)
| partition by: id
|
02:SORT
| order by: id ASC NULLS FIRST
|
01:SCAN HDFS [functional.alltypestiny b]
partitions=4/4 files=4 size=460B
====
# IMPALA-1946: Check that On-clause predicates of an outer join assigned in a scan
# are not wrapped in TupleIsNullPredicates.
select /* +straight_join */ a.id, b.id
from functional.alltypestiny a
left outer join
(select t1.id, ifnull(t1.int_col, 10) as int_col
from functional.alltypestiny t1
inner join functional.alltypestiny t2
on (t1.id = t2.id)) b
on (a.id = b.id and b.int_col < 10)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = t1.id
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: t2.id = t1.id
| | runtime filters: RF000 <- t1.id
| |
| |--01:SCAN HDFS [functional.alltypestiny t1]
| | partitions=4/4 files=4 size=460B
| | predicates: ifnull(t1.int_col, 10) < 10
| |
| 02:SCAN HDFS [functional.alltypestiny t2]
| partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t2.id
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
====
# IMPALA-2832: Test proper cloning of analytic function call exprs in a CTAS.
create table impala_2832 as select
first_value(int_col) over (order by int_col rows between current row and current row),
first_value(bigint_col) over (order by int_col rows between current row and current row)
from functional.alltypes
---- PLAN
WRITE TO HDFS [default.impala_2832, OVERWRITE=false]
| partitions=1
|
02:ANALYTIC
| functions: last_value(int_col), last_value(bigint_col)
| order by: int_col ASC
| window: ROWS BETWEEN CURRENT ROW AND CURRENT ROW
|
01:SORT
| order by: int_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# For first/last_value(), ranges windows get rewritten as rows windows,
# so these should be grouped.
select last_value(int_col) over (order by bigint_col
range between unbounded preceding and current row),
first_value(int_col) over (order by bigint_col
rows between unbounded preceding and current row)
from functional.alltypes
---- PLAN
PLAN-ROOT SINK
|
02:ANALYTIC
| functions: last_value(int_col), first_value(int_col)
| order by: bigint_col ASC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
01:SORT
| order by: bigint_col ASC
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4263: Analytic function needs a hash exchange because the partition exprs
# reference a tuple that is made nullable in the join fragment.
select /* +straight_join */ count(*) over (partition by t1.id)
from functional.alltypes t1
right outer join /* +shuffle */ functional.alltypessmall t2
on t1.id = t2.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:ANALYTIC
| functions: count(*)
| partition by: t1.id
|
03:SORT
| order by: id ASC NULLS FIRST
|
07:EXCHANGE [HASH(t1.id)]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
|
|--06:EXCHANGE [HASH(t2.id)]
| |
| 01:SCAN HDFS [functional.alltypessmall t2]
| partitions=4/4 files=4 size=6.32KB
|
05:EXCHANGE [HASH(t1.id)]
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t1.id
====
# IMPALA-1882: Confirm that first_value function used without a partition by and order
# by clause does not need a sort node
select first_value(tinyint_col ignore nulls) over () from functional.alltypesagg
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:ANALYTIC
| functions: first_value_ignore_nulls(tinyint_col)
|
02:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# IMPALA-1882: Confirm that last_value function used without a partition by and order
# by clause does not need a sort node
select last_value(tinyint_col ignore nulls) over () from functional.alltypesagg
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:ANALYTIC
| functions: last_value_ignore_nulls(tinyint_col)
|
02:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# IMPALA-1882: Confirm that first_value function using only a partition by clause
# sorts over partition column
select *, first_value(id) over (partition by bool_col) first_val from
functional.alltypessmall;
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:ANALYTIC
| functions: first_value(id)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
03:EXCHANGE [HASH(bool_col)]
|
00:SCAN HDFS [functional.alltypessmall]
partitions=4/4 files=4 size=6.32KB
====
# IMPALA-1882: Confirm that last_value function using only a partition by clause
# sorts over partition column
select *, last_value(id) over (partition by bool_col) first_val from
functional.alltypessmall;
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:ANALYTIC
| functions: last_value(id)
| partition by: bool_col
|
01:SORT
| order by: bool_col ASC NULLS FIRST
|
03:EXCHANGE [HASH(bool_col)]
|
00:SCAN HDFS [functional.alltypessmall]
partitions=4/4 files=4 size=6.32KB
====