blob: 54a65995c5c9ce9435cf2184045dbad2c722b5dc [file] [log] [blame]
--! qt:dataset:part
SET hive.vectorized.execution.enabled=false;
set hive.strict.checks.bucketing=false;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
CREATE TABLE srcbucket_mapjoin_part_1_n2 (key INT, value STRING) PARTITIONED BY (part STRING)
CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='1');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='1');
ALTER TABLE srcbucket_mapjoin_part_1_n2 CLUSTERED BY (key) INTO 4 BUCKETS;
LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2');
CREATE TABLE srcbucket_mapjoin_part_2_n6 (key INT, value STRING) PARTITIONED BY (part STRING)
CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1');
ALTER TABLE srcbucket_mapjoin_part_2_n6 CLUSTERED BY (key) INTO 2 BUCKETS;
LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='2');
LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='2');
set hive.cbo.enable=false;
set hive.optimize.bucketmapjoin=true;
-- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is
-- a power of 2 and the bucketing columns match so bucket map join should be used
EXPLAIN EXTENDED
SELECT /*+ MAPJOIN(b) */ count(*)
FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b
ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;
SELECT /*+ MAPJOIN(b) */ count(*)
FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b
ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;
EXPLAIN EXTENDED
SELECT /*+ MAPJOIN(b) */ count(*)
FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b
ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL;
SELECT /*+ MAPJOIN(b) */ count(*)
FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b
ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL;