blob: 4f703e0269334d14bf3c93b1a9bb298a84881f97 [file] [log] [blame]
--! qt:dataset:src1
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.auto.convert.join=false;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery xx and xx join yy.
-- This case is used to test LEFT SEMI JOIN since Hive will
-- introduce a GroupByOperator before the ReduceSinkOperator of
-- the right table (yy in queries below)
-- of LEFT SEMI JOIN.
-- SORT_AND_HASH_QUERY_RESULTS
EXPLAIN
SELECT xx.key, xx.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
LEFT SEMI JOIN src yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
LEFT SEMI JOIN src yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, xx.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
LEFT SEMI JOIN src yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
LEFT SEMI JOIN src yy
ON xx.key=yy.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery xx and xx join yy.
-- This case is used to test LEFT SEMI JOIN since Hive will
-- introduce a GroupByOperator before the ReduceSinkOperator of
-- the right table (yy in queries below)
-- of LEFT SEMI JOIN.
EXPLAIN
SELECT xx.key, xx.value
FROM
src1 xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND
y.key > 20) yy
ON xx.key=yy.key;
SELECT xx.key, xx.value
FROM
src1 xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND
y.key > 20) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, xx.value
FROM
src1 xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND
y.key > 20) yy
ON xx.key=yy.key;
SELECT xx.key, xx.value
FROM
src1 xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND
y.key > 20) yy
ON xx.key=yy.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- This test is used to test if we can use shared scan for
-- xx, yy:x, and yy:y.
EXPLAIN
SELECT xx.key, xx.value
FROM
src xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND x.key > 180) yy
ON xx.key=yy.key;
SELECT xx.key, xx.value
FROM
src xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND x.key > 180) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, xx.value
FROM
src xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND x.key > 180) yy
ON xx.key=yy.key;
SELECT xx.key, xx.value
FROM
src xx
LEFT SEMI JOIN
(SELECT x.key as key
FROM src x JOIN src y ON (x.key = y.key)
WHERE x.key < 200 AND x.key > 180) yy
ON xx.key=yy.key;