blob: 960e6e9cfd789af92a9fe2c1cd89cc33dba7b4da [file] [log] [blame]
set hive.stats.column.autogather=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
SET hive.auto.convert.join=true;
SET hive.auto.convert.join.noconditionaltask=true;
SET hive.auto.convert.join.noconditionaltask.size=1000000000;
SET hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true;
set hive.fetch.task.conversion=none;
SET mapred.min.split.size=1000;
SET mapred.max.split.size=5000;
SET hive.optimize.scan.probedecode=true;
CREATE TABLE item_dim_dt (key1 DATE, name string) stored as ORC;
CREATE TABLE orders_fact_dt (nokey int, key2 DATE, dt timestamp) stored as ORC;
INSERT INTO item_dim_dt values('2001-01-30', "Item 101");
INSERT INTO item_dim_dt values('2002-01-30', "Item 102");
INSERT INTO orders_fact_dt values(12345, '2001-01-30', '2011-01-30 00:00:00');
INSERT INTO orders_fact_dt values(23456, '2004-01-30', '2014-02-30 00:00:00');
INSERT INTO orders_fact_dt values(34567, '2008-01-30', '2018-03-30 00:00:00');
INSERT INTO orders_fact_dt values(45678, '2002-01-30', '2012-04-30 00:00:00');
INSERT INTO orders_fact_dt values(56789, '2009-01-30', '2019-05-30 00:00:00');
INSERT INTO orders_fact_dt values(67891, '2010-01-30', '2020-06-30 00:00:00');
-- Reduce Sink Vectorization -> Expected className: VectorReduceSinkLongOperator
EXPLAIN VECTORIZATION DETAIL select key1, key2, name, dt from orders_fact_dt join item_dim_dt on (orders_fact_dt.key2 = item_dim_dt.key1);
-- two keys match, the remaining rows can be skipped
select key1, key2, name, dt from orders_fact_dt join item_dim_dt on (orders_fact_dt.key2 = item_dim_dt.key1);
CREATE TABLE item_dim_ts (key1 timestamp, name string) stored as ORC;
CREATE TABLE orders_fact_ts (nokey int, key2 timestamp, dt timestamp) stored as ORC;
INSERT INTO item_dim_ts values('2001-01-30 00:00:00', "Item 101");
INSERT INTO item_dim_ts values('2002-01-30 00:00:00', "Item 102");
INSERT INTO orders_fact_ts values(12345, '2001-01-30 00:00:00', '2011-01-30 00:00:00');
INSERT INTO orders_fact_ts values(23456, '2004-01-30 00:00:00', '2014-02-30 00:00:00');
INSERT INTO orders_fact_ts values(34567, '2008-01-30 00:00:00', '2018-03-30 00:00:00');
INSERT INTO orders_fact_ts values(45678, '2002-01-30 00:00:00', '2012-04-30 00:00:00');
INSERT INTO orders_fact_ts values(56789, '2009-01-30 00:00:00', '2019-05-30 00:00:00');
INSERT INTO orders_fact_ts values(67891, '2010-01-30 00:00:00', '2020-06-30 00:00:00');
-- Reduce Sink Vectorization -> Expected className: VectorReduceSinkMultiKeyOperator
EXPLAIN VECTORIZATION DETAIL select key1, key2, name, dt from orders_fact_ts join item_dim_ts on (orders_fact_ts.key2 = item_dim_ts.key1);
-- two keys match, the remaining rows can be skipped
select key1, key2, name, dt from orders_fact_ts join item_dim_ts on (orders_fact_ts.key2 = item_dim_ts.key1);