blob: 74544458f0ffc70013361e0c49cd69e38ce39c6b [file] [log] [blame]
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.exec.reducers.max = 1;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
-- Create bucketed and sorted tables
CREATE TABLE test_table1_n15 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table2_n14 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table3_n7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table4_n1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table5 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table6 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE test_table8 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
INSERT OVERWRITE TABLE test_table1_n15
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table2_n14
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table3_n7
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table4_n1
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table5
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table6
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table7
SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table8
SELECT * FROM src WHERE key < 10;
set hive.auto.convert.sortmerge.join=false;
-- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables
EXPLAIN
SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*)
FROM test_table1_n15 a JOIN test_table2_n14 b ON a.key = b.key
JOIN test_table3_n7 c ON a.key = c.key
JOIN test_table4_n1 d ON a.key = d.key
JOIN test_table5 e ON a.key = e.key
JOIN test_table6 f ON a.key = f.key
JOIN test_table7 g ON a.key = g.key;
SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*)
FROM test_table1_n15 a JOIN test_table2_n14 b ON a.key = b.key
JOIN test_table3_n7 c ON a.key = c.key
JOIN test_table4_n1 d ON a.key = d.key
JOIN test_table5 e ON a.key = e.key
JOIN test_table6 f ON a.key = f.key
JOIN test_table7 g ON a.key = g.key;
set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
-- It should be automatically converted to a sort-merge join followed by a groupby in
-- a single MR job
EXPLAIN
SELECT count(*)
FROM test_table1_n15 a LEFT OUTER JOIN test_table2_n14 b ON a.key = b.key
LEFT OUTER JOIN test_table3_n7 c ON a.key = c.key
LEFT OUTER JOIN test_table4_n1 d ON a.key = d.key
LEFT OUTER JOIN test_table5 e ON a.key = e.key
LEFT OUTER JOIN test_table6 f ON a.key = f.key
LEFT OUTER JOIN test_table7 g ON a.key = g.key;
SELECT count(*)
FROM test_table1_n15 a LEFT OUTER JOIN test_table2_n14 b ON a.key = b.key
LEFT OUTER JOIN test_table3_n7 c ON a.key = c.key
LEFT OUTER JOIN test_table4_n1 d ON a.key = d.key
LEFT OUTER JOIN test_table5 e ON a.key = e.key
LEFT OUTER JOIN test_table6 f ON a.key = f.key
LEFT OUTER JOIN test_table7 g ON a.key = g.key;
EXPLAIN
SELECT count(*)
FROM test_table1_n15 a LEFT OUTER JOIN test_table2_n14 b ON a.key = b.key
LEFT OUTER JOIN test_table3_n7 c ON a.key = c.key
LEFT OUTER JOIN test_table4_n1 d ON a.key = d.key
LEFT OUTER JOIN test_table5 e ON a.key = e.key
LEFT OUTER JOIN test_table6 f ON a.key = f.key
LEFT OUTER JOIN test_table7 g ON a.key = g.key
LEFT OUTER JOIN test_table8 h ON a.key = h.key;
SELECT count(*)
FROM test_table1_n15 a LEFT OUTER JOIN test_table2_n14 b ON a.key = b.key
LEFT OUTER JOIN test_table3_n7 c ON a.key = c.key
LEFT OUTER JOIN test_table4_n1 d ON a.key = d.key
LEFT OUTER JOIN test_table5 e ON a.key = e.key
LEFT OUTER JOIN test_table6 f ON a.key = f.key
LEFT OUTER JOIN test_table7 g ON a.key = g.key
LEFT OUTER JOIN test_table8 h ON a.key = h.key;
-- outer join with max 16 aliases
EXPLAIN
SELECT a.*
FROM test_table1_n15 a
LEFT OUTER JOIN test_table2_n14 b ON a.key = b.key
LEFT OUTER JOIN test_table3_n7 c ON a.key = c.key
LEFT OUTER JOIN test_table4_n1 d ON a.key = d.key
LEFT OUTER JOIN test_table5 e ON a.key = e.key
LEFT OUTER JOIN test_table6 f ON a.key = f.key
LEFT OUTER JOIN test_table7 g ON a.key = g.key
LEFT OUTER JOIN test_table8 h ON a.key = h.key
LEFT OUTER JOIN test_table4_n1 i ON a.key = i.key
LEFT OUTER JOIN test_table5 j ON a.key = j.key
LEFT OUTER JOIN test_table6 k ON a.key = k.key
LEFT OUTER JOIN test_table7 l ON a.key = l.key
LEFT OUTER JOIN test_table8 m ON a.key = m.key
LEFT OUTER JOIN test_table7 n ON a.key = n.key
LEFT OUTER JOIN test_table8 o ON a.key = o.key
LEFT OUTER JOIN test_table4_n1 p ON a.key = p.key
LEFT OUTER JOIN test_table5 q ON a.key = q.key
LEFT OUTER JOIN test_table6 r ON a.key = r.key
LEFT OUTER JOIN test_table7 s ON a.key = s.key
LEFT OUTER JOIN test_table8 t ON a.key = t.key;