blob: 3cbc3f231334cd213cbe2b3845d09f0c7b7c48af [file] [log] [blame]
set hive.mapred.mode=nonstrict;
set hive.auto.convert.join=true;
-- empty tables
create table studenttab10k (name string, age int, gpa double);
create table votertab10k (name string, age int, registration string, contributions float);
explain select s.name, count(distinct registration)
from studenttab10k s join votertab10k v
on (s.name = v.name)
group by s.name;
select s.name, count(distinct registration)
from studenttab10k s join votertab10k v
on (s.name = v.name)
group by s.name;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;
set hive.auto.convert.sortmerge.join=true;
-- smb
create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets;
create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets;
explain select s.name, count(distinct registration)
from studenttab10k_smb s join votertab10k_smb v
on (s.name = v.name)
group by s.name;
select s.name, count(distinct registration)
from studenttab10k_smb s join votertab10k_smb v
on (s.name = v.name)
group by s.name;
load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_smb;
load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_smb;
load data local inpath '../../data/files/empty/000000_0' into table votertab10k_smb;
load data local inpath '../../data/files/empty/000001_0' into table votertab10k_smb;
explain select s.name, count(distinct registration)
from studenttab10k_smb s join votertab10k_smb v
on (s.name = v.name)
group by s.name;
select s.name, count(distinct registration)
from studenttab10k_smb s join votertab10k_smb v
on (s.name = v.name)
group by s.name;
-- smb + partitions
create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
load data local inpath '../../data/files/empty/000000_0' into table studenttab10k_part partition (p='foo');
load data local inpath '../../data/files/empty/000001_0' into table studenttab10k_part partition (p='foo');
load data local inpath '../../data/files/empty/000000_0' into table votertab10k_part partition (p='foo');
load data local inpath '../../data/files/empty/000001_0' into table votertab10k_part partition (p='foo');
explain select s.name, count(distinct registration)
from studenttab10k_part s join votertab10k_part v
on (s.name = v.name)
where s.p = 'bar'
and v.p = 'bar'
group by s.name;
select s.name, count(distinct registration)
from studenttab10k_part s join votertab10k_part v
on (s.name = v.name)
where s.p = 'bar'
and v.p = 'bar'
group by s.name;
drop table studenttab10k;
drop table votertab10k;
drop table studenttab10k_smb;
drop table votertab10k_smb;
drop table studenttab10k_part;
drop table votertab10k_part;