blob: a352a77bc269dc97af928b5a94ab2abbbaa79f04 [file] [log] [blame]
set hive.mapred.mode=nonstrict;
set hive.stats.fetch.column.stats=true;
create table if not exists loc_staging (
state string,
locid int,
zip bigint,
year int
) row format delimited fields terminated by '|' stored as textfile;
create table loc_orc like loc_staging;
alter table loc_orc set fileformat orc;
load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
insert overwrite table loc_orc select * from loc_staging;
-- numRows: 8 rawDataSize: 796
explain select * from loc_orc;
-- column stats are not COMPLETE, so stats are not updated
-- numRows: 8 rawDataSize: 796
explain select * from loc_orc where state='OH';
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- state column has 5 distincts. numRows/countDistincts
-- numRows: 1 rawDataSize: 102
explain select * from loc_orc where state='OH';
-- not equals comparison shouldn't affect number of rows
-- numRows: 8 rawDataSize: 804
explain select * from loc_orc where state!='OH';
explain select * from loc_orc where state<>'OH';
-- nulls are treated as constant equality comparison
-- numRows: 1 rawDataSize: 102
explain select * from loc_orc where zip is null;
-- numRows: 1 rawDataSize: 102
explain select * from loc_orc where !(zip is not null);
-- not nulls are treated as inverse of nulls
-- numRows: 7 rawDataSize: 702
explain select * from loc_orc where zip is not null;
-- numRows: 7 rawDataSize: 702
explain select * from loc_orc where !(zip is null);
-- NOT evaluation. true will pass all rows, false will not pass any rows
-- numRows: 8 rawDataSize: 804
explain select * from loc_orc where !false;
-- numRows: 0 rawDataSize: 0
explain select * from loc_orc where !true;
-- Constant evaluation. true will pass all rows, false will not pass any rows
-- numRows: 8 rawDataSize: 804
explain select * from loc_orc where true;
-- numRows: 8 rawDataSize: 804
explain select * from loc_orc where 'foo';
-- numRows: 8 rawDataSize: 804
explain select * from loc_orc where true = true;
-- numRows: 0 rawDataSize: 0
explain select * from loc_orc where false = true;
-- numRows: 0 rawDataSize: 0
explain select * from loc_orc where 'foo' = 'bar';
-- numRows: 0 rawDataSize: 0
explain select * from loc_orc where false;
-- OR evaluation. 1 row for OH and 1 row for CA
-- numRows: 2 rawDataSize: 204
explain select * from loc_orc where state='OH' or state='CA';
-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
-- numRows: 2 rawDataSize: 204
explain select * from loc_orc where year=2001 and year is null;
-- numRows: 1 rawDataSize: 102
explain select * from loc_orc where year=2001 and state='OH' and state='FL';
-- AND and OR together. left expr will yield 1 row and right will yield 1 row
-- numRows: 3 rawDataSize: 306
explain select * from loc_orc where (year=2001 and year is null) or (state='CA');
-- AND and OR together. left expr will yield 8 rows and right will yield 1 row
-- numRows: 1 rawDataSize: 102
explain select * from loc_orc where (year=2001 or year is null) and (state='CA');
-- inequality conditions falling out of range. total or zero (converted to one)
-- numRows: 1 rawDataSize: 102
-- numRows: 8 rawDataSize: 804
explain select * from loc_orc where locid < 30;
explain select * from loc_orc where locid > 30;
explain select * from loc_orc where locid <= 30;
explain select * from loc_orc where locid >= 30;
-- all inequality conditions falling within range. rows/3 is the rules
-- numRows: 2 rawDataSize: 204
explain select * from loc_orc where locid < 3;
explain select * from loc_orc where locid > 3;
explain select * from loc_orc where locid <= 3;
explain select * from loc_orc where locid >= 3;