blob: f9c948bf8cc04436aa3a8c9b73071020f7c81ba2 [file] [log] [blame]
--! qt:dataset:src1
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.auto.convert.join=false;
set hive.optimize.correlation=false;
-- SORT_QUERY_RESULTS
-- When Correlation Optimizer is turned off, 6 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery xx, subquery yy, and xx join yy.
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
-- Enable hive.auto.convert.join.
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
set hive.auto.convert.join=false;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 3 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery yy and xx join yy.
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
ON xx.key=yy.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
ON xx.key=yy.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
ON xx.key=yy.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery yy and xx join yy.
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key;
set hive.auto.convert.join=false;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery xx and xx join yy.
EXPLAIN
SELECT xx.key, xx.cnt, yy.key
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN src yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN src yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, xx.cnt, yy.key
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN src yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN src yy
ON xx.key=yy.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery xx and xx join yy join zz.
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN src zz ON xx.key=zz.key
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON zz.key=yy.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN src zz ON xx.key=zz.key
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON zz.key=yy.key;
set hive.optimize.correlation=true;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery yy and xx join yy join zz.
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN src zz ON xx.key=zz.key
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON zz.key=yy.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN src zz ON xx.key=zz.key
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON zz.key=yy.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery yy and xx join yy join zz.
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key JOIN src zz
ON yy.key=zz.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key JOIN src zz
ON yy.key=zz.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key JOIN src zz
ON yy.key=zz.key;
SELECT xx.key, yy.key, yy.cnt
FROM src1 xx
JOIN
(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
ON xx.key=yy.key JOIN src zz
ON yy.key=zz.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 6 MR jobs are needed.
-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
-- The first job will evaluate subquery tmp and tmp join z.
EXPLAIN
SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
FROM
(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
ON (xx.key=yy.key) GROUP BY xx.key) tmp
JOIN src z ON tmp.key=z.key;
SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
FROM
(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
ON (xx.key=yy.key) GROUP BY xx.key) tmp
JOIN src z ON tmp.key=z.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
FROM
(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
ON (xx.key=yy.key) GROUP BY xx.key) tmp
JOIN src z ON tmp.key=z.key;
SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
FROM
(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
ON (xx.key=yy.key) GROUP BY xx.key) tmp
JOIN src z ON tmp.key=z.key;
set hive.optimize.correlation=false;
-- When Correlation Optimizer is turned off, 6 MR jobs are needed.
-- When Correlation Optimizer is turned on, 4 MR jobs are needed.
-- 2 MR jobs are used to evaluate yy, 1 MR is used to evaluate xx and xx join yy.
-- The last MR is used for ordering.
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
set hive.optimize.correlation=true;
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
ON xx.key=yy.key;
set hive.optimize.correlation=true;
set hive.auto.convert.join=true;
EXPLAIN
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
ON xx.key=yy.key;
SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
FROM
(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
JOIN
(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
ON xx.key=yy.key;