blob: 5696682fd5c3f0ab214873bf2714ebab3ab0959c [file] [log] [blame]
--! qt:dataset:src1
--! qt:dataset:src
--! qt:dataset:part
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
;
set hive.exec.reducers.max = 1;
-- SORT_QUERY_RESULTS
CREATE TABLE tbl1_n10(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE tbl2_n9(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
insert overwrite table tbl1_n10
select * from src where key < 10;
insert overwrite table tbl2_n9
select * from src where key < 10;
set hive.auto.convert.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
--disable hash joins
set hive.auto.convert.join.noconditionaltask.size=1;
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select count(*) from (
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1;
select count(*) from (
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1;
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key;
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key;
-- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
explain
select count(*) from
(
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key
) subq2;
select count(*) from
(
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key
) subq2;
-- A join is being performed across different sub-queries, where a join is being performed in each of them.
-- Each sub-query should be converted to a sort-merge join.
explain
select src1.key, src1.cnt1, src2.cnt1 from
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1 group by key
) src1
join
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq2 group by key
) src2
on src1.key = src2.key;
select src1.key, src1.cnt1, src2.cnt1 from
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1 group by key
) src1
join
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq2 group by key
) src2
on src1.key = src2.key;
-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join.
explain
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on subq1.key = subq2.key;
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on subq1.key = subq2.key;
-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join, although there is more than one level of sub-query
explain
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key;
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key;
-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
-- The join should be converted to a sort-merge join
explain
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq3
where key < 6
) subq4
on subq2.key = subq4.key;
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq3
where key < 6
) subq4
on subq2.key = subq4.key;
-- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
-- item, but that is not part of the join key.
explain
select count(*) from
(select a.key as key, concat(a.value, a.value) as value from tbl1_n10 a where key < 8) subq1
join
(select a.key as key, concat(a.value, a.value) as value from tbl2_n9 a where key < 8) subq2
on subq1.key = subq2.key;
select count(*) from
(select a.key as key, concat(a.value, a.value) as value from tbl1_n10 a where key < 8) subq1
join
(select a.key as key, concat(a.value, a.value) as value from tbl2_n9 a where key < 8) subq2
on subq1.key = subq2.key;
-- Since the join key is modified by the sub-query, neither sort-merge join not bucketized mapside
-- join should be performed
explain
select count(*) from
(select a.key +1 as key, concat(a.value, a.value) as value from tbl1_n10 a) subq1
join
(select a.key +1 as key, concat(a.value, a.value) as value from tbl2_n9 a) subq2
on subq1.key = subq2.key;
select count(*) from
(select a.key +1 as key, concat(a.value, a.value) as value from tbl1_n10 a) subq1
join
(select a.key +1 as key, concat(a.value, a.value) as value from tbl2_n9 a) subq2
on subq1.key = subq2.key;
-- The left table is a sub-query and the right table is not.
-- It should be converted to a sort-merge join.
explain
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join tbl2_n9 a on subq1.key = a.key;
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join tbl2_n9 a on subq1.key = a.key;
-- The right table is a sub-query and the left table is not.
-- It should be converted to a sort-merge join.
explain
select count(*) from tbl1_n10 a
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq1
on a.key = subq1.key;
select count(*) from tbl1_n10 a
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq1
on a.key = subq1.key;
-- There are more than 2 inputs to the join, all of them being sub-queries.
-- It should be converted to to a sort-merge join
explain
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on (subq1.key = subq2.key)
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq3
on (subq1.key = subq3.key);
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on subq1.key = subq2.key
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq3
on (subq1.key = subq3.key);
-- The join is being performed on a nested sub-query, and an aggregation is performed after that.
-- The join should be converted to a sort-merge join
explain
select count(*) from (
select subq2.key as key, subq2.value as value1, b.value as value2 from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key) a;
select count(*) from (
select subq2.key as key, subq2.value as value1, b.value as value2 from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key) a;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select count(*) from (
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1;
select count(*) from (
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1;
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key;
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key;
-- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
explain
select count(*) from
(
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key
) subq2;
select count(*) from
(
select key, count(*) from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1
group by key
) subq2;
-- A join is being performed across different sub-queries, where a join is being performed in each of them.
-- Each sub-query should be converted to a sort-merge join.
explain
select src1.key, src1.cnt1, src2.cnt1 from
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1 group by key
) src1
join
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq2 group by key
) src2
on src1.key = src2.key;
select src1.key, src1.cnt1, src2.cnt1 from
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq1 group by key
) src1
join
(
select key, count(*) as cnt1 from
(
select a.key as key, a.value as val1, b.value as val2 from tbl1_n10 a join tbl2_n9 b on a.key = b.key
) subq2 group by key
) src2
on src1.key = src2.key;
-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join.
explain
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on subq1.key = subq2.key;
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on subq1.key = subq2.key;
-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join, although there is more than one level of sub-query
explain
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key;
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key;
-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
-- The join should be converted to a sort-merge join
explain
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq3
where key < 6
) subq4
on subq2.key = subq4.key;
select count(*) from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq3
where key < 6
) subq4
on subq2.key = subq4.key;
-- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
-- item, but that is not part of the join key.
explain
select count(*) from
(select a.key as key, concat(a.value, a.value) as value from tbl1_n10 a where key < 8) subq1
join
(select a.key as key, concat(a.value, a.value) as value from tbl2_n9 a where key < 8) subq2
on subq1.key = subq2.key;
select count(*) from
(select a.key as key, concat(a.value, a.value) as value from tbl1_n10 a where key < 8) subq1
join
(select a.key as key, concat(a.value, a.value) as value from tbl2_n9 a where key < 8) subq2
on subq1.key = subq2.key;
-- The left table is a sub-query and the right table is not.
-- It should be converted to a sort-merge join.
explain
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join tbl2_n9 a on subq1.key = a.key;
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join tbl2_n9 a on subq1.key = a.key;
-- The right table is a sub-query and the left table is not.
-- It should be converted to a sort-merge join.
explain
select count(*) from tbl1_n10 a
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq1
on a.key = subq1.key;
select count(*) from tbl1_n10 a
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq1
on a.key = subq1.key;
-- There are more than 2 inputs to the join, all of them being sub-queries.
-- It should be converted to to a sort-merge join
explain
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on (subq1.key = subq2.key)
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq3
on (subq1.key = subq3.key);
select count(*) from
(select a.key as key, a.value as value from tbl1_n10 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq2
on subq1.key = subq2.key
join
(select a.key as key, a.value as value from tbl2_n9 a where key < 6) subq3
on (subq1.key = subq3.key);
-- The join is being performed on a nested sub-query, and an aggregation is performed after that.
-- The join should be converted to a sort-merge join
explain
select count(*) from (
select subq2.key as key, subq2.value as value1, b.value as value2 from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key) a;
select count(*) from (
select subq2.key as key, subq2.value as value1, b.value as value2 from
(
select * from
(
select a.key as key, a.value as value from tbl1_n10 a where key < 8
) subq1
where key < 6
) subq2
join tbl2_n9 b
on subq2.key = b.key) a;