blob: d75a48f6c036a426d5788cd68aacd28f3b0f0b18 [file] [log] [blame]
set hive.stats.column.autogather=false;
-- Currently, a query with multiple FileSinkOperators are not supported.
set hive.mapred.mode=nonstrict;
CREATE TABLE T1_n19(key INT, val STRING);
LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1_n19;
CREATE TABLE T2_n11(key INT, val STRING);
LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2_n11;
CREATE TABLE T3_n5(key INT, val STRING);
LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3_n5;
CREATE TABLE T4_n1(key INT, val STRING);
LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4_n1;
CREATE TABLE dest_co1(key INT, val STRING);
CREATE TABLE dest_co2(key INT, val STRING);
CREATE TABLE dest_co3(key INT, val STRING);
set hive.auto.convert.join=false;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 3 MR jobs are needed.
-- When Correlation Optimizer is turned on, only a single MR job is needed.
EXPLAIN
INSERT OVERWRITE TABLE dest_co1
SELECT b.key, d.val
FROM
(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b
JOIN
(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d
ON b.key = d.key;
INSERT OVERWRITE TABLE dest_co1
SELECT b.key, d.val
FROM
(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b
JOIN
(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d
ON b.key = d.key;
set hive.optimize.correlation=true;
EXPLAIN
INSERT OVERWRITE TABLE dest_co2
SELECT b.key, d.val
FROM
(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b
JOIN
(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d
ON b.key = d.key;
INSERT OVERWRITE TABLE dest_co2
SELECT b.key, d.val
FROM
(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b
JOIN
(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d
ON b.key = d.key;
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask.size=10000000000;
-- Enable hive.auto.convert.join.
EXPLAIN
INSERT OVERWRITE TABLE dest_co3
SELECT b.key, d.val
FROM
(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b
JOIN
(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d
ON b.key = d.key;
INSERT OVERWRITE TABLE dest_co3
SELECT b.key, d.val
FROM
(SELECT x.key, x.val FROM T1_n19 x JOIN T2_n11 y ON (x.key = y.key)) b
JOIN
(SELECT m.key, n.val FROM T3_n5 m JOIN T4_n1 n ON (m.key = n.key)) d
ON b.key = d.key;
-- dest_co1, dest_co2 and dest_co3 should be same
-- SELECT * FROM dest_co1 x ORDER BY x.key, x.val;
-- SELECT * FROM dest_co2 x ORDER BY x.key, x.val;
SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co1;
SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co2;
SELECT SUM(HASH(key)), SUM(HASH(val)) FROM dest_co3;