blob: 28ef582dcd76fd8face3d6fe627f8f9dd3683e6b [file] [log] [blame]
--! qt:dataset:src
set hive.compute.query.using.stats=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.optimize.ppd=true;
set hive.ppd.remove.duplicatefilters=true;
set hive.tez.dynamic.partition.pruning=true;
set hive.tez.dynamic.semijoin.reduction=true;
set hive.optimize.metadataonly=false;
set hive.optimize.index.filter=true;
set hive.tez.bigtable.minsize.semijoin.reduction=1;
set hive.tez.min.bloom.filter.entries=1;
set hive.vectorized.adaptor.usage.mode=none;
set hive.vectorized.execution.enabled=true;
-- Create Tables
create table dsrv_big stored as orc as select key as key_str, key as key_str2, cast(key as int) as key_int, cast(key as int) as key_int2, value from src;
create table dsrv_small stored as orc as select distinct key as key_str, key as key_str2, cast(key as int) as key_int, cast(key as int) as key_int2, value from src where key < 100;
-- single key (int)
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int);
select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int);
-- single key (string)
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str);
select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str);
-- keys are different type
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str);
select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_str);
-- multiple tables
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int;
select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int;
-- multiple keys
SELECT 'use MurmurHashStringColIntCol';
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int);
select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int);
SELECT 'use MurmurHashStringColIntCol, regardless of key ordering on join clause';
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int and a.key_str = b.key_str);
select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int and a.key_str = b.key_str);
SELECT 'use MurmurHashIntColIntCol';
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int and a.key_int2 = b.key_int2);
select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int and a.key_int2 = b.key_int2);
SELECT 'use MurmurHashStringColStringCol';
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_str2 = b.key_str2);
select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_str2 = b.key_str2);
-- small table result is empty
EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2');
select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2');
-- bloomfilter expectedEntries should use ndv if available. Compare to first query
analyze table dsrv_small compute statistics;
analyze table dsrv_small compute statistics for columns;
set hive.stats.fetch.column.stats=true;
EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int);
drop table dsrv_big;
drop table dsrv_small;