blob: f2ecb2c573b7c4cbcf8c797c651e2ecf0f3c6e86 [file] [log] [blame]
# IMPALA-4166: insert into tables with sort.columns property adds sort node. Clustering
# columns are added to the sort columns.
insert into table test_sort_by.t partition(year, month) /*+ shuffle */
select id, int_col, bool_col, year, month from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
01:EXCHANGE [HASH(year,month)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: insert with noshuffle hint into tables with sort.columns property adds
# sort node.
insert into table test_sort_by.t partition(year, month) /*+ noshuffle */
select id, int_col, bool_col, year, month from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: insert into tables with sort.columns property adds sort node. Clustering
# columns are added to the sort columns. noclustered hint is ignored when sort.columns
# are specified.
insert into table test_sort_by.t partition(year, month) /*+ noclustered */
select id, int_col, bool_col, year, month from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
01:EXCHANGE [HASH(year,month)]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: insert into tables with sort.columns property adds sort node.
insert into table test_sort_by.t_nopart /*+ shuffle */
select id, int_col, bool_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: insert with noshuffle hint into tables with sort.columns property adds
# sort node.
insert into table test_sort_by.t_nopart /*+ noshuffle */
select id, int_col, bool_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: sort columns are correct when using an identity column permutation.
insert into table test_sort_by.t_nopart (id, int_col, bool_col) /*+ shuffle */
select id, int_col, bool_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: sort columns are correct when using a non-trivial column permutation.
insert into table test_sort_by.t_nopart (bool_col, id, int_col) /*+ shuffle */
select bool_col, id, int_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: sort columns are correct when using a partial column permutation.
insert into table test_sort_by.t_nopart (bool_col, id) /*+ shuffle */
select bool_col, id from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: bool_col ASC NULLS LAST
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: bool_col ASC NULLS LAST
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: no sort node is added when using a partial column permutation and none of
# the sort columns appear in the permutation.
insert into table test_sort_by.t_nopart (id) /*+ shuffle */
select id from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# IMPALA-4166: sort columns with a join
insert into table test_sort_by.t partition(year, month) /*+ noclustered */
select a.id, b.int_col, a.bool_col, b.year, a.month
from functional.alltypes a join functional.alltypes b on a.id = b.id order by b.string_col
limit 10
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
04:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
03:TOP-N [LIMIT=10]
| order by: string_col ASC
|
02:HASH JOIN [INNER JOIN]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
|
|--00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypes b]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
07:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
|
06:MERGING-EXCHANGE [UNPARTITIONED]
| order by: string_col ASC
| limit: 10
|
03:TOP-N [LIMIT=10]
| order by: string_col ASC
|
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
|
|--05:EXCHANGE [HASH(a.id)]
| |
| 00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
|
04:EXCHANGE [HASH(b.id)]
|
01:SCAN HDFS [functional.alltypes b]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
====
# IMPALA-4166: sort columns with a join and agg
insert into table test_sort_by.t partition(year, month) /*+ noclustered */
select a.id, max(b.int_col), min(a.bool_col), b.year, a.month
from functional.alltypes a join functional.alltypes b on a.id = b.id
group by a.id, b.year, a.month
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.year,a.month)]
| partitions=24
|
04:SORT
| order by: b.year ASC NULLS LAST, a.month ASC NULLS LAST, max(b.int_col) ASC NULLS LAST, min(a.bool_col) ASC NULLS LAST
|
03:AGGREGATE [FINALIZE]
| output: max(b.int_col), min(a.bool_col)
| group by: a.id, b.year, a.month
|
02:HASH JOIN [INNER JOIN]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
|
|--00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypes b]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.year,a.month)]
| partitions=24
|
09:SORT
| order by: b.year ASC NULLS LAST, a.month ASC NULLS LAST, max(b.int_col) ASC NULLS LAST, min(a.bool_col) ASC NULLS LAST
|
08:EXCHANGE [HASH(b.year,a.month)]
|
07:AGGREGATE [FINALIZE]
| output: max:merge(b.int_col), min:merge(a.bool_col)
| group by: a.id, b.year, a.month
|
06:EXCHANGE [HASH(a.id,b.year,a.month)]
|
03:AGGREGATE [STREAMING]
| output: max(b.int_col), min(a.bool_col)
| group by: a.id, b.year, a.month
|
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
|
|--05:EXCHANGE [HASH(a.id)]
| |
| 00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
|
04:EXCHANGE [HASH(b.id)]
|
01:SCAN HDFS [functional.alltypes b]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
====
# IMPALA-5339: Sort columns with a union to trigger expr rewrite
insert into table test_sort_by.t_nopart
select 0, cast(id as int), false from (select 1 as id union select cast(2 as int) as id) sub
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: CAST(id AS INT) ASC NULLS LAST
|
01:AGGREGATE [FINALIZE]
| group by: id
|
00:UNION
constant-operands=2
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: CAST(id AS INT) ASC NULLS LAST
|
01:AGGREGATE [FINALIZE]
| group by: id
|
00:UNION
constant-operands=2
====
# IMPALA-5339: Sort columns with a subquery to trigger expr rewrite
insert into table test_sort_by.t_nopart
select 0, id, false from test_sort_by.t_nopart where id = (select min(id) from test_sort_by.t)
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
04:SORT
| order by: id ASC NULLS LAST
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = min(id)
| runtime filters: RF000 <- min(id)
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| |
| 01:SCAN HDFS [test_sort_by.t]
| partitions=0/0 files=0 size=0B
|
00:SCAN HDFS [test_sort_by.t_nopart]
partitions=1/0 files=0 size=0B
runtime filters: RF000 -> id
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
07:SORT
| order by: id ASC NULLS LAST
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: id = min(id)
| runtime filters: RF000 <- min(id)
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(id)
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(id)
| |
| 01:SCAN HDFS [test_sort_by.t]
| partitions=0/0 files=0 size=0B
|
00:SCAN HDFS [test_sort_by.t_nopart]
partitions=1/0 files=0 size=0B
runtime filters: RF000 -> id
====