blob: 010f530d96a6493053d1d63cff373542e8bc3599 [file] [log] [blame]
set hive.mapred.mode=nonstrict;
set hive.stats.fetch.column.stats=true;
set hive.stats.autogather=false;
set hive.exec.dynamic.partition=true;
set hive.metastore.aggregate.stats.cache.enabled=false;
create table if not exists loc_staging_n4 (
state string,
locid int,
zip bigint,
year string
) row format delimited fields terminated by '|' stored as textfile;
LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging_n4;
create table if not exists loc_orc_n4 (
state string,
locid int,
zip bigint
) partitioned by(year string) stored as orc;
-- basicStatState: NONE colStatState: NONE
explain select * from loc_orc_n4;
insert overwrite table loc_orc_n4 partition(year) select * from loc_staging_n4;
-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
-- basicStatState: PARTIAL colStatState: NONE
explain select * from loc_orc_n4;
-- partition level analyze statistics for specific parition
analyze table loc_orc_n4 partition(year='2001') compute statistics;
-- basicStatState: PARTIAL colStatState: NONE
explain select * from loc_orc_n4 where year='__HIVE_DEFAULT_PARTITION__';
-- basicStatState: PARTIAL colStatState: NONE
explain select * from loc_orc_n4;
-- basicStatState: COMPLETE colStatState: NONE
explain select * from loc_orc_n4 where year='2001';
-- partition level analyze statistics for all partitions
analyze table loc_orc_n4 partition(year) compute statistics;
-- basicStatState: COMPLETE colStatState: NONE
explain select * from loc_orc_n4 where year='__HIVE_DEFAULT_PARTITION__';
-- basicStatState: COMPLETE colStatState: NONE
explain select * from loc_orc_n4;
-- basicStatState: COMPLETE colStatState: NONE
explain select * from loc_orc_n4 where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
-- both partitions will be pruned
-- basicStatState: NONE colStatState: NONE
explain select * from loc_orc_n4 where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
-- partition level partial column statistics
analyze table loc_orc_n4 partition(year='2001') compute statistics for columns state,locid;
-- basicStatState: COMPLETE colStatState: NONE
explain select zip from loc_orc_n4;
-- basicStatState: COMPLETE colStatState: PARTIAL
explain select state from loc_orc_n4;
-- basicStatState: COMPLETE colStatState: COMPLETE
explain select year from loc_orc_n4;
-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
-- basicStatState: COMPLETE colStatState: PARTIAL
explain select state,locid from loc_orc_n4;
-- basicStatState: COMPLETE colStatState: COMPLETE
explain select state,locid from loc_orc_n4 where year='2001';
-- basicStatState: COMPLETE colStatState: NONE
explain select state,locid from loc_orc_n4 where year!='2001';
-- basicStatState: COMPLETE colStatState: PARTIAL
explain select * from loc_orc_n4;
-- This is to test filter expression evaluation on partition column
-- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
explain select locid from loc_orc_n4 where locid>0 and year='2001';
explain select locid,year from loc_orc_n4 where locid>0 and year='2001';
explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001';