blob: 705952dc20d80b61a23d67418dbd240eb8a7b722 [file] [log] [blame]
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.fetch.task.conversion=none;
-- Check if vectorization code is handling partitioning on DATE and the other data types.
CREATE TABLE flights_tiny_n1 (
origin_city_name STRING,
dest_city_name STRING,
fl_date DATE,
arr_delay FLOAT,
fl_num INT
);
LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE flights_tiny_n1;
CREATE TABLE flights_tiny_orc STORED AS ORC AS
SELECT origin_city_name, dest_city_name, fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') as fl_time, arr_delay, fl_num
FROM flights_tiny_n1;
-- SORT_QUERY_RESULTS
SELECT * FROM flights_tiny_orc;
SET hive.vectorized.execution.enabled=false;
select * from flights_tiny_orc sort by fl_num, fl_date limit 25;
select fl_date, count(*) from flights_tiny_orc group by fl_date;
SET hive.vectorized.execution.enabled=true;
explain vectorization expression
select * from flights_tiny_orc sort by fl_num, fl_date limit 25;
select * from flights_tiny_orc sort by fl_num, fl_date limit 25;
explain vectorization expression
select fl_date, count(*) from flights_tiny_orc group by fl_date;
select fl_date, count(*) from flights_tiny_orc group by fl_date;
SET hive.vectorized.execution.enabled=false;
CREATE TABLE flights_tiny_orc_partitioned_date (
origin_city_name STRING,
dest_city_name STRING,
fl_time TIMESTAMP,
arr_delay FLOAT,
fl_num INT
)
PARTITIONED BY (fl_date DATE)
STORED AS ORC;
INSERT INTO TABLE flights_tiny_orc_partitioned_date
PARTITION (fl_date)
SELECT origin_city_name, dest_city_name, fl_time, arr_delay, fl_num, fl_date
FROM flights_tiny_orc;
select * from flights_tiny_orc_partitioned_date;
select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25;
select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date;
SET hive.vectorized.execution.enabled=true;
explain vectorization expression
select * from flights_tiny_orc_partitioned_date;
select * from flights_tiny_orc_partitioned_date;
explain vectorization expression
select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25;
select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25;
explain vectorization expression
select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date;
select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date;
SET hive.vectorized.execution.enabled=false;
CREATE TABLE flights_tiny_orc_partitioned_timestamp (
origin_city_name STRING,
dest_city_name STRING,
fl_date DATE,
arr_delay FLOAT,
fl_num INT
)
PARTITIONED BY (fl_time TIMESTAMP)
STORED AS ORC;
INSERT INTO TABLE flights_tiny_orc_partitioned_timestamp
PARTITION (fl_time)
SELECT origin_city_name, dest_city_name, fl_date, arr_delay, fl_num, fl_time
FROM flights_tiny_orc;
select * from flights_tiny_orc_partitioned_timestamp;
select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25;
select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time;
SET hive.vectorized.execution.enabled=true;
explain vectorization expression
select * from flights_tiny_orc_partitioned_timestamp;
select * from flights_tiny_orc_partitioned_timestamp;
explain vectorization expression
select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25;
select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25;
explain vectorization expression
select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time;
select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time;
-- test for Parquet file format
CREATE TABLE flights_tiny_parquet STORED AS PARQUET AS
SELECT origin_city_name, dest_city_name, fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') as fl_time, arr_delay, fl_num
FROM flights_tiny_n1;
SELECT * FROM flights_tiny_parquet;
SET hive.vectorized.execution.enabled=false;
select * from flights_tiny_parquet sort by fl_num, fl_date limit 25;
select fl_date, count(*) from flights_tiny_parquet group by fl_date;
SET hive.vectorized.execution.enabled=true;
explain vectorization expression
select * from flights_tiny_parquet sort by fl_num, fl_date limit 25;
select * from flights_tiny_parquet sort by fl_num, fl_date limit 25;
explain vectorization expression
select fl_date, count(*) from flights_tiny_parquet group by fl_date;
select fl_date, count(*) from flights_tiny_parquet group by fl_date;
SET hive.vectorized.execution.enabled=false;
CREATE TABLE flights_tiny_parquet_partitioned_date (
origin_city_name STRING,
dest_city_name STRING,
fl_time TIMESTAMP,
arr_delay FLOAT,
fl_num INT
)
PARTITIONED BY (fl_date DATE)
STORED AS PARQUET;
INSERT INTO TABLE flights_tiny_parquet_partitioned_date
PARTITION (fl_date)
SELECT origin_city_name, dest_city_name, fl_time, arr_delay, fl_num, fl_date
FROM flights_tiny_parquet;
select * from flights_tiny_parquet_partitioned_date;
select * from flights_tiny_parquet_partitioned_date sort by fl_num, fl_date limit 25;
select fl_date, count(*) from flights_tiny_parquet_partitioned_date group by fl_date;
SET hive.vectorized.execution.enabled=true;
explain vectorization expression
select * from flights_tiny_parquet_partitioned_date;
select * from flights_tiny_parquet_partitioned_date;
explain vectorization expression
select * from flights_tiny_parquet_partitioned_date sort by fl_num, fl_date limit 25;
select * from flights_tiny_parquet_partitioned_date sort by fl_num, fl_date limit 25;
explain vectorization expression
select fl_date, count(*) from flights_tiny_parquet_partitioned_date group by fl_date;
select fl_date, count(*) from flights_tiny_parquet_partitioned_date group by fl_date;
SET hive.vectorized.execution.enabled=false;
CREATE TABLE flights_tiny_parquet_partitioned_timestamp (
origin_city_name STRING,
dest_city_name STRING,
fl_date DATE,
arr_delay FLOAT,
fl_num INT
)
PARTITIONED BY (fl_time TIMESTAMP)
STORED AS PARQUET;
INSERT INTO TABLE flights_tiny_parquet_partitioned_timestamp
PARTITION (fl_time)
SELECT origin_city_name, dest_city_name, fl_date, arr_delay, fl_num, fl_time
FROM flights_tiny_parquet;
select * from flights_tiny_parquet_partitioned_timestamp;
select * from flights_tiny_parquet_partitioned_timestamp sort by fl_num, fl_time limit 25;
select fl_time, count(*) from flights_tiny_parquet_partitioned_timestamp group by fl_time;
SET hive.vectorized.execution.enabled=true;
explain vectorization expression
select * from flights_tiny_parquet_partitioned_timestamp;
select * from flights_tiny_parquet_partitioned_timestamp;
explain vectorization expression
select * from flights_tiny_parquet_partitioned_timestamp sort by fl_num, fl_time limit 25;
select * from flights_tiny_parquet_partitioned_timestamp sort by fl_num, fl_time limit 25;
explain vectorization expression
select fl_time, count(*) from flights_tiny_parquet_partitioned_timestamp group by fl_time;
select fl_time, count(*) from flights_tiny_parquet_partitioned_timestamp group by fl_time;