blob: 9f6997e641fa0a9de1c9a8de4cd38891a7753bc5 [file] [log] [blame]
--! qt:dataset:src1
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.auto.convert.join=false;
-- Tests in this file are used to make sure Correlation Optimizer
-- can correctly handle tables with partitions
CREATE TABLE part_table_n1(key string, value string) PARTITIONED BY (partitionId int);
INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=1)
SELECT key, value FROM src ORDER BY key, value LIMIT 100;
INSERT OVERWRITE TABLE part_table_n1 PARTITION (partitionId=2)
SELECT key, value FROM src1 ORDER BY key, value;
set hive.optimize.correlation=false;
-- In this case, we should not do shared scan on part_table_n1
-- because left and right tables of JOIN use different partitions
-- of part_table_n1. With Correlation Optimizer we will generate
-- 1 MR job.
EXPLAIN
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 1 AND
y.partitionId = 2
GROUP BY x.key;
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 1 AND
y.partitionId = 2
GROUP BY x.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 1 AND
y.partitionId = 2
GROUP BY x.key;
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 1 AND
y.partitionId = 2
GROUP BY x.key;
set hive.optimize.correlation=false;
-- In this case, we should do shared scan on part_table_n1
-- because left and right tables of JOIN use the same partition
-- of part_table_n1. With Correlation Optimizer we will generate
-- 1 MR job.
EXPLAIN
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 2 AND
y.partitionId = 2
GROUP BY x.key;
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 2 AND
y.partitionId = 2
GROUP BY x.key;
set hive.optimize.correlation=true;
EXPLAIN
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 2 AND
y.partitionId = 2
GROUP BY x.key;
SELECT x.key AS key, count(1) AS cnt
FROM part_table_n1 x JOIN part_table_n1 y ON (x.key = y.key)
WHERE x.partitionId = 2 AND
y.partitionId = 2
GROUP BY x.key;