blob: 1da32c3b30bee6aa2199deb4dcd35aaf7336f2a5 [file] [log] [blame]
set hive.mapred.mode=nonstrict;
set hive.metastore.stats.ndv.densityfunction=true;
set hive.stats.fetch.column.stats=true;
set hive.exec.dynamic.partition=true;
set hive.metastore.aggregate.stats.cache.enabled=false;
drop table if exists ext_loc_n0;
create table ext_loc_n0 (
state string,
locid double,
cnt decimal,
zip int,
year string
) row format delimited fields terminated by '|' stored as textfile;
LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc_n0;
drop table if exists loc_orc_1d_n0;
create table loc_orc_1d_n0 (
state string,
locid double,
cnt decimal,
zip int
) partitioned by(year string) stored as orc;
insert overwrite table loc_orc_1d_n0 partition(year) select * from ext_loc_n0;
analyze table loc_orc_1d_n0 partition(year='2001') compute statistics for columns state,locid,cnt,zip;
analyze table loc_orc_1d_n0 partition(year='2002') compute statistics for columns state,locid,cnt,zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') zip;
explain extended select state,locid,cnt,zip from loc_orc_1d_n0;
analyze table loc_orc_1d_n0 partition(year='2000') compute statistics for columns state,locid,cnt,zip;
analyze table loc_orc_1d_n0 partition(year='2003') compute statistics for columns state,locid,cnt,zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') zip;
-- compute stats for all partitions together
analyze table loc_orc_1d_n0 partition(year) compute statistics for columns state,locid,cnt,zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2001') zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2002') zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') state;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') locid;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') cnt;
describe formatted loc_orc_1d_n0 PARTITION(year='2000') zip;
describe formatted loc_orc_1d_n0 PARTITION(year='2003') zip;
explain extended select state,locid,cnt,zip from loc_orc_1d_n0;
drop table if exists loc_orc_2d_n0;
create table loc_orc_2d_n0 (
state string,
locid int,
cnt decimal
) partitioned by(zip int, year string) stored as orc;
insert overwrite table loc_orc_2d_n0 partition(zip, year) select * from ext_loc_n0;
analyze table loc_orc_2d_n0 partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt;
analyze table loc_orc_2d_n0 partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt;
describe formatted loc_orc_2d_n0 partition(zip=94086, year='2001') state;
describe formatted loc_orc_2d_n0 partition(zip=94087, year='2002') state;
describe formatted loc_orc_2d_n0 partition(zip=94086, year='2001') locid;
describe formatted loc_orc_2d_n0 partition(zip=94087, year='2002') locid;
describe formatted loc_orc_2d_n0 partition(zip=94086, year='2001') cnt;
describe formatted loc_orc_2d_n0 partition(zip=94087, year='2002') cnt;
-- compute stats for all partitions together
analyze table loc_orc_2d_n0 partition(zip, year) compute statistics for columns state,locid,cnt;
describe formatted loc_orc_2d_n0 partition(zip=94086, year='2001') state;
describe formatted loc_orc_2d_n0 partition(zip=94087, year='2002') state;
describe formatted loc_orc_2d_n0 partition(zip=94086, year='2001') locid;
describe formatted loc_orc_2d_n0 partition(zip=94087, year='2002') locid;
describe formatted loc_orc_2d_n0 partition(zip=94086, year='2001') cnt;
describe formatted loc_orc_2d_n0 partition(zip=94087, year='2002') cnt;
explain extended select state,locid,cnt,zip from loc_orc_2d_n0;