blob: 5e3a1b84fbaafc0bb77be1ffa8111387cb9eb13c [file] [log] [blame]
--! qt:dataset:srcpart
--! qt:dataset:alltypesorc
SET hive.vectorized.execution.enabled=false;
set hive.explain.user=true;
set hive.compute.query.using.stats=false;
set hive.mapred.mode=nonstrict;
set hive.optimize.ppd=true;
set hive.ppd.remove.duplicatefilters=true;
set hive.tez.dynamic.partition.pruning=true;
set hive.tez.dynamic.semijoin.reduction=true;
set hive.optimize.metadataonly=false;
set hive.optimize.index.filter=true;
set hive.stats.autogather=true;
set hive.tez.bigtable.minsize.semijoin.reduction=1;
set hive.tez.min.bloom.filter.entries=1;
set hive.stats.fetch.column.stats=true;
set hive.llap.memory.oversubscription.max.executors.per.query=0;
-- Create Tables
create table alltypesorc_int_n2 ( cint int, cstring string ) stored as ORC;
create table srcpart_date_n9 (key string, value string) partitioned by (ds string ) stored as ORC;
CREATE TABLE srcpart_small_n4(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC;
-- Add Partitions
alter table srcpart_date_n9 add partition (ds = "2008-04-08");
alter table srcpart_date_n9 add partition (ds = "2008-04-09");
alter table srcpart_small_n4 add partition (ds = "2008-04-08");
alter table srcpart_small_n4 add partition (ds = "2008-04-09");
-- Load
insert overwrite table alltypesorc_int_n2 select cint, cstring1 from alltypesorc;
insert overwrite table srcpart_date_n9 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08";
insert overwrite table srcpart_date_n9 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09";
insert overwrite table srcpart_small_n4 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20;
set hive.tez.dynamic.semijoin.reduction=false;
analyze table alltypesorc_int_n2 compute statistics for columns;
analyze table srcpart_date_n9 compute statistics for columns;
analyze table srcpart_small_n4 compute statistics for columns;
-- single column, single key
set hive.tez.dynamic.semijoin.reduction=true;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
set hive.tez.dynamic.semijoin.reduction=true;
-- Mix dynamic partition pruning(DPP) and min/max bloom filter optimizations. Should pick the DPP.
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.ds);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.ds);
set hive.tez.dynamic.semijoin.reduction=false;
--multiple sources, single key
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring);
set hive.tez.dynamic.semijoin.reduction=true;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring);
set hive.tez.dynamic.semijoin.reduction=false;
-- single source, multiple keys
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1 and srcpart_date_n9.value = srcpart_small_n4.value1);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1 and srcpart_date_n9.value = srcpart_small_n4.value1);
set hive.tez.dynamic.semijoin.reduction=true;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1 and srcpart_date_n9.value = srcpart_small_n4.value1);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1 and srcpart_date_n9.value = srcpart_small_n4.value1);
set hive.tez.dynamic.semijoin.reduction=false;
-- multiple sources, different keys
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
set hive.tez.dynamic.semijoin.reduction=true;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
-- Explain extended to verify fast start for Reducer in semijoin branch
EXPLAIN extended select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
set hive.tez.dynamic.semijoin.reduction=false;
-- With Mapjoins.
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=100000000000;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
set hive.tez.dynamic.semijoin.reduction=true;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1);
set hive.tez.dynamic.semijoin.reduction=false;
-- multiple sources, different keys
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
set hive.tez.dynamic.semijoin.reduction=true;
EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring);
--set hive.tez.dynamic.semijoin.reduction=false;
-- With unions
explain select * from alltypesorc_int_n2 join
(select srcpart_date_n9.key as key from srcpart_date_n9
union all
select srcpart_small_n4.key1 as key from srcpart_small_n4) unionsrc on (alltypesorc_int_n2.cstring = unionsrc.key);
drop table srcpart_date_n9;
drop table srcpart_small_n4;
drop table alltypesorc_int_n2;