blob: dea684dc7e44a7ca18753e4acf25b80db97e09d7 [file] [log] [blame]
--! qt:dataset:src
-- MASK_STATS
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.exec.reducers.max = 1;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
-- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset
-- of join columns
-- Create bucketed and sorted tables
CREATE TABLE test_table1_n4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS;
CREATE TABLE test_table2_n4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS;
FROM src
INSERT OVERWRITE TABLE test_table1_n4 SELECT *
INSERT OVERWRITE TABLE test_table2_n4 SELECT *;
-- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key = b.key ORDER BY a.key LIMIT 10;
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key = b.key ORDER BY a.key LIMIT 10;
DROP TABLE test_table1_n4;
DROP TABLE test_table2_n4;
-- Create bucketed and sorted tables
CREATE TABLE test_table1_n4 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS;
CREATE TABLE test_table2_n4 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS;
FROM src
INSERT OVERWRITE TABLE test_table1_n4 SELECT key, key, value
INSERT OVERWRITE TABLE test_table2_n4 SELECT key, key, value;
-- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10;
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10;
-- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns
-- even if the order is not the same
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10;
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10;
-- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all
-- the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10;
SELECT /*+mapjoin(b)*/ * FROM test_table1_n4 a JOIN test_table2_n4 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10;
DROP TABLE test_table1_n4;
DROP TABLE test_table2_n4;