blob: 1e20c4b214d88d60c0fa9e489aacf034b9aa20ce [file] [log] [blame]
# IMPALA-4166: insert into tables with sort.columns property adds sort node. Clustering
# columns are added to the sort columns.
insert into table test_sort_by.t partition(year, month) /*+ shuffle */
select id, int_col, bool_col, year, month from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
01:EXCHANGE [HASH(`year`,`month`)]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=17B cardinality=7.30K
====
# IMPALA-4166: insert with noshuffle hint into tables with sort.columns property adds
# sort node.
insert into table test_sort_by.t partition(year, month) /*+ noshuffle */
select id, int_col, bool_col, year, month from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=17B cardinality=7.30K
====
# IMPALA-4166: insert into tables with sort.columns property adds sort node. Clustering
# columns are added to the sort columns. noclustered hint is ignored when sort.columns
# are specified.
insert into table test_sort_by.t partition(year, month) /*+ noclustered */
select id, int_col, bool_col, year, month from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
01:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
02:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
01:EXCHANGE [HASH(`year`,`month`)]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=17B cardinality=7.30K
====
# IMPALA-4166: insert into tables with sort.columns property adds sort node.
insert into table test_sort_by.t_nopart /*+ shuffle */
select id, int_col, bool_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
====
# IMPALA-4166: insert with noshuffle hint into tables with sort.columns property adds
# sort node.
insert into table test_sort_by.t_nopart /*+ noshuffle */
select id, int_col, bool_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
====
# IMPALA-4166: sort columns are correct when using an identity column permutation.
insert into table test_sort_by.t_nopart (id, int_col, bool_col) /*+ shuffle */
select id, int_col, bool_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
====
# IMPALA-4166: sort columns are correct when using a non-trivial column permutation.
insert into table test_sort_by.t_nopart (bool_col, id, int_col) /*+ shuffle */
select bool_col, id, int_col from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=9B cardinality=7.30K
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=9B cardinality=7.30K
====
# IMPALA-4166: sort columns are correct when using a partial column permutation.
insert into table test_sort_by.t_nopart (bool_col, id) /*+ shuffle */
select bool_col, id from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:SORT
| order by: bool_col ASC NULLS LAST
| row-size=5B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=5B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: bool_col ASC NULLS LAST
| row-size=5B cardinality=7.30K
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=5B cardinality=7.30K
====
# IMPALA-4166: no sort node is added when using a partial column permutation and none of
# the sort columns appear in the permutation.
insert into table test_sort_by.t_nopart (id) /*+ shuffle */
select id from functional.alltypes
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
====
# IMPALA-4166: sort columns with a join
insert into table test_sort_by.t partition(year, month) /*+ noclustered */
select a.id, b.int_col, a.bool_col, b.year, a.month
from functional.alltypes a join functional.alltypes b on a.id = b.id order by b.string_col
limit 10
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
04:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=10
|
03:TOP-N [LIMIT=10]
| order by: string_col ASC
| row-size=30B cardinality=10
|
02:HASH JOIN [INNER JOIN]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
| row-size=34B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=9B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
row-size=25B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(year,month)]
| partitions=24
|
07:SORT
| order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST
| row-size=17B cardinality=10
|
06:MERGING-EXCHANGE [UNPARTITIONED]
| order by: string_col ASC
| limit: 10
|
03:TOP-N [LIMIT=10]
| order by: string_col ASC
| row-size=30B cardinality=10
|
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
| row-size=34B cardinality=7.30K
|
|--05:EXCHANGE [HASH(a.id)]
| |
| 00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=9B cardinality=7.30K
|
04:EXCHANGE [HASH(b.id)]
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
row-size=25B cardinality=7.30K
====
# IMPALA-4166: sort columns with a join and agg
insert into table test_sort_by.t partition(year, month) /*+ noclustered */
select a.id, max(b.int_col), min(a.bool_col), b.year, a.month
from functional.alltypes a join functional.alltypes b on a.id = b.id
group by a.id, b.year, a.month
---- PLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.`year`,a.`month`)]
| partitions=24
|
04:SORT
| order by: b.`year` ASC NULLS LAST, a.`month` ASC NULLS LAST, max(b.int_col) ASC NULLS LAST, min(a.bool_col) ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
03:AGGREGATE [FINALIZE]
| output: max(b.int_col), min(a.bool_col)
| group by: a.id, b.`year`, a.`month`
| row-size=17B cardinality=7.30K
|
02:HASH JOIN [INNER JOIN]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
| row-size=21B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=9B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
row-size=12B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t, OVERWRITE=false, PARTITION-KEYS=(b.`year`,a.`month`)]
| partitions=24
|
09:SORT
| order by: b.`year` ASC NULLS LAST, a.`month` ASC NULLS LAST, max(b.int_col) ASC NULLS LAST, min(a.bool_col) ASC NULLS LAST
| row-size=17B cardinality=7.30K
|
08:EXCHANGE [HASH(b.`year`,a.`month`)]
|
07:AGGREGATE [FINALIZE]
| output: max:merge(b.int_col), min:merge(a.bool_col)
| group by: a.id, b.`year`, a.`month`
| row-size=17B cardinality=7.30K
|
06:EXCHANGE [HASH(a.id,b.`year`,a.`month`)]
|
03:AGGREGATE [STREAMING]
| output: max(b.int_col), min(a.bool_col)
| group by: a.id, b.`year`, a.`month`
| row-size=17B cardinality=7.30K
|
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
| row-size=21B cardinality=7.30K
|
|--05:EXCHANGE [HASH(a.id)]
| |
| 00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=9B cardinality=7.30K
|
04:EXCHANGE [HASH(b.id)]
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
row-size=12B cardinality=7.30K
====
# IMPALA-5339: Sort columns with a union to trigger expr rewrite
insert into table test_sort_by.t_nopart
select 0, cast(id as int), false from (select 1 as id union select cast(2 as int) as id) sub
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: id ASC NULLS LAST
| row-size=1B cardinality=2
|
01:AGGREGATE [FINALIZE]
| group by: id
| row-size=1B cardinality=2
|
00:UNION
constant-operands=2
row-size=1B cardinality=2
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
02:SORT
| order by: id ASC NULLS LAST
| row-size=1B cardinality=2
|
01:AGGREGATE [FINALIZE]
| group by: id
| row-size=1B cardinality=2
|
00:UNION
constant-operands=2
row-size=1B cardinality=2
====
# IMPALA-5339: Sort columns with a subquery to trigger expr rewrite
insert into table test_sort_by.t_nopart
select 0, id, false from test_sort_by.t_nopart where id = (select min(id) from test_sort_by.t)
---- PLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
04:SORT
| order by: id ASC NULLS LAST
| row-size=4B cardinality=0
|
03:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: min(id) = id
| row-size=4B cardinality=0
|
|--00:SCAN HDFS [test_sort_by.t_nopart]
| HDFS partitions=1/0 files=0 size=0B
| row-size=4B cardinality=0
|
02:AGGREGATE [FINALIZE]
| output: min(id)
| row-size=4B cardinality=1
|
01:SCAN HDFS [test_sort_by.t]
partitions=0/0 files=0 size=0B
row-size=4B cardinality=0
---- DISTRIBUTEDPLAN
WRITE TO HDFS [test_sort_by.t_nopart, OVERWRITE=false]
| partitions=1
|
08:SORT
| order by: id ASC NULLS LAST
| row-size=4B cardinality=0
|
03:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
| hash predicates: min(id) = id
| row-size=4B cardinality=0
|
|--07:EXCHANGE [HASH(id)]
| |
| 00:SCAN HDFS [test_sort_by.t_nopart]
| HDFS partitions=1/0 files=0 size=0B
| row-size=4B cardinality=0
|
06:EXCHANGE [HASH(min(id))]
|
05:AGGREGATE [FINALIZE]
| output: min:merge(id)
| row-size=4B cardinality=1
|
04:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: min(id)
| row-size=4B cardinality=1
|
01:SCAN HDFS [test_sort_by.t]
partitions=0/0 files=0 size=0B
row-size=4B cardinality=0
====