| --! qt:dataset:src1 |
| --! qt:dataset:src |
| set hive.mapred.mode=nonstrict; |
| set hive.auto.convert.join=false; |
| set hive.optimize.correlation=false; |
| |
| -- SORT_QUERY_RESULTS |
| |
| -- When Correlation Optimizer is turned off, 6 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery xx, subquery yy, and xx join yy. |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| set hive.auto.convert.join=true; |
| -- Enable hive.auto.convert.join. |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.auto.convert.join=false; |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 3 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery yy and xx join yy. |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 4 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery yy and xx join yy. |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key; |
| |
| set hive.auto.convert.join=false; |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 4 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery xx and xx join yy. |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN src yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN src yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN src yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN src yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 4 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery xx and xx join yy join zz. |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN src zz ON xx.key=zz.key |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON zz.key=yy.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN src zz ON xx.key=zz.key |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON zz.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| -- When Correlation Optimizer is turned off, 4 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery yy and xx join yy join zz. |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN src zz ON xx.key=zz.key |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON zz.key=yy.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN src zz ON xx.key=zz.key |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON zz.key=yy.key; |
| |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 4 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery yy and xx join yy join zz. |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key JOIN src zz |
| ON yy.key=zz.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key JOIN src zz |
| ON yy.key=zz.key; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key JOIN src zz |
| ON yy.key=zz.key; |
| |
| SELECT xx.key, yy.key, yy.cnt |
| FROM src1 xx |
| JOIN |
| (SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy |
| ON xx.key=yy.key JOIN src zz |
| ON yy.key=zz.key; |
| |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 6 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 2 MR jobs are needed. |
| -- The first job will evaluate subquery tmp and tmp join z. |
| EXPLAIN |
| SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value |
| FROM |
| (SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 |
| FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx |
| JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy |
| ON (xx.key=yy.key) GROUP BY xx.key) tmp |
| JOIN src z ON tmp.key=z.key; |
| |
| SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value |
| FROM |
| (SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 |
| FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx |
| JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy |
| ON (xx.key=yy.key) GROUP BY xx.key) tmp |
| JOIN src z ON tmp.key=z.key; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value |
| FROM |
| (SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 |
| FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx |
| JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy |
| ON (xx.key=yy.key) GROUP BY xx.key) tmp |
| JOIN src z ON tmp.key=z.key; |
| |
| SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value |
| FROM |
| (SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 |
| FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx |
| JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy |
| ON (xx.key=yy.key) GROUP BY xx.key) tmp |
| JOIN src z ON tmp.key=z.key; |
| |
| set hive.optimize.correlation=false; |
| -- When Correlation Optimizer is turned off, 6 MR jobs are needed. |
| -- When Correlation Optimizer is turned on, 4 MR jobs are needed. |
| -- 2 MR jobs are used to evaluate yy, 1 MR is used to evaluate xx and xx join yy. |
| -- The last MR is used for ordering. |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy |
| ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; |
| |
| SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy |
| ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; |
| |
| set hive.optimize.correlation=true; |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy |
| ON xx.key=yy.key; |
| |
| set hive.optimize.correlation=true; |
| set hive.auto.convert.join=true; |
| EXPLAIN |
| SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy |
| ON xx.key=yy.key; |
| |
| SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt |
| FROM |
| (SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx |
| JOIN |
| (SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy |
| ON xx.key=yy.key; |
| |