blob: 33e0b7a4fa99b123c3dba36f7c23d6c68c328c62 [file] [log] [blame]
--! qt:dataset:alltypesorc
set hive.fetch.task.conversion=none;
set hive.vectorized.use.row.serde.deserialize=true;
set hive.vectorized.use.vector.serde.deserialize=true;
set hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.reduce.enabled=true;
-- SORT_QUERY_RESULTS
create table if not exists alltypes_parquet_n0 (
cint int,
ctinyint tinyint,
csmallint smallint,
cfloat float,
cdouble double,
cstring1 string) stored as parquet;
insert overwrite table alltypes_parquet_n0
select cint,
ctinyint,
csmallint,
cfloat,
cdouble,
cstring1
from alltypesorc;
-- test native fileinputformat vectorization
explain vectorization select *
from alltypes_parquet_n0
where cint = 528534767
limit 10;
select *
from alltypes_parquet_n0
where cint = 528534767
limit 10;
explain vectorization select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_parquet_n0
group by ctinyint;
select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_parquet_n0
group by ctinyint;
-- exclude MapredParquetInputFormat from vectorization, this should cause mapwork vectorization to be disabled
set hive.vectorized.input.format.excludes=org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
explain vectorization select *
from alltypes_parquet_n0
where cint = 528534767
limit 10;
select *
from alltypes_parquet_n0
where cint = 528534767
limit 10;
explain vectorization select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_parquet_n0
group by ctinyint;
select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_parquet_n0
group by ctinyint;
-- unset hive.vectorized.input.format.excludes and confirm if vectorizer vectorizes the mapwork
set hive.vectorized.input.format.excludes=;
explain vectorization select *
from alltypes_parquet_n0
where cint = 528534767
limit 10;
select *
from alltypes_parquet_n0
where cint = 528534767
limit 10;
explain vectorization select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_parquet_n0
group by ctinyint;
select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_parquet_n0
group by ctinyint;
-- check if hive.vectorized.input.format.excludes work with non-parquet inputformats
set hive.vectorized.input.format.excludes=org.apache.hadoop.hive.ql.io.orc.OrcInputFormat,org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
set hive.vectorized.use.row.serde.deserialize=false;
set hive.vectorized.use.vector.serde.deserialize=false;
create table if not exists alltypes_orc_n2 (
cint int,
ctinyint tinyint,
csmallint smallint,
cfloat float,
cdouble double,
cstring1 string) stored as orc;
insert overwrite table alltypes_orc_n2
select cint,
ctinyint,
csmallint,
cfloat,
cdouble,
cstring1
from alltypesorc;
explain vectorization select *
from alltypes_orc_n2
where cint = 528534767
limit 10;
select *
from alltypes_orc_n2
where cint = 528534767
limit 10;
explain vectorization select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_orc_n2
group by ctinyint;
select ctinyint,
max(cint),
min(csmallint),
count(cstring1),
avg(cfloat),
stddev_pop(cdouble)
from alltypes_orc_n2
group by ctinyint;
-- test when input format is excluded row serde is used for vectorization
set hive.vectorized.input.format.excludes=org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat,org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
set hive.vectorized.use.vectorized.input.format=true;
set hive.vectorized.use.row.serde.deserialize=true;
set hive.vectorized.row.serde.inputformat.excludes=;
create table orcTbl (t1 tinyint, t2 tinyint)
stored as orc;
insert into orcTbl values (54, 9), (-104, 25), (-112, 24);
explain vectorization select t1, t2, (t1+t2) from orcTbl where (t1+t2) > 10;
select t1, t2, (t1+t2) from orcTbl where (t1+t2) > 10;
create table parquetTbl (t1 tinyint, t2 tinyint)
stored as parquet;
insert into parquetTbl values (54, 9), (-104, 25), (-112, 24);
explain vectorization SELECT t1, t2, (t1 + t2) FROM parquetTbl WHERE (t1 + t2) > 10;
SELECT t1, t2, (t1 + t2) FROM parquetTbl WHERE (t1 + t2) > 10;