| set hive.mapred.mode=nonstrict; |
| set hive.stats.fetch.column.stats=true; |
| |
| create table if not exists loc_staging ( |
| state string, |
| locid int, |
| zip bigint, |
| year int |
| ) row format delimited fields terminated by '|' stored as textfile; |
| |
| create table loc_orc like loc_staging; |
| alter table loc_orc set fileformat orc; |
| |
| load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; |
| |
| insert overwrite table loc_orc select * from loc_staging; |
| |
| -- numRows: 8 rawDataSize: 796 |
| explain select * from loc_orc; |
| |
| -- column stats are not COMPLETE, so stats are not updated |
| -- numRows: 8 rawDataSize: 796 |
| explain select * from loc_orc where state='OH'; |
| |
| analyze table loc_orc compute statistics for columns state,locid,zip,year; |
| |
| -- state column has 5 distincts. numRows/countDistincts |
| -- numRows: 1 rawDataSize: 102 |
| explain select * from loc_orc where state='OH'; |
| |
| -- not equals comparison shouldn't affect number of rows |
| -- numRows: 8 rawDataSize: 804 |
| explain select * from loc_orc where state!='OH'; |
| explain select * from loc_orc where state<>'OH'; |
| |
| -- nulls are treated as constant equality comparison |
| -- numRows: 1 rawDataSize: 102 |
| explain select * from loc_orc where zip is null; |
| -- numRows: 1 rawDataSize: 102 |
| explain select * from loc_orc where !(zip is not null); |
| |
| -- not nulls are treated as inverse of nulls |
| -- numRows: 7 rawDataSize: 702 |
| explain select * from loc_orc where zip is not null; |
| -- numRows: 7 rawDataSize: 702 |
| explain select * from loc_orc where !(zip is null); |
| |
| -- NOT evaluation. true will pass all rows, false will not pass any rows |
| -- numRows: 8 rawDataSize: 804 |
| explain select * from loc_orc where !false; |
| -- numRows: 0 rawDataSize: 0 |
| explain select * from loc_orc where !true; |
| |
| -- Constant evaluation. true will pass all rows, false will not pass any rows |
| -- numRows: 8 rawDataSize: 804 |
| explain select * from loc_orc where true; |
| -- numRows: 8 rawDataSize: 804 |
| explain select * from loc_orc where 'foo'; |
| -- numRows: 8 rawDataSize: 804 |
| explain select * from loc_orc where true = true; |
| -- numRows: 0 rawDataSize: 0 |
| explain select * from loc_orc where false = true; |
| -- numRows: 0 rawDataSize: 0 |
| explain select * from loc_orc where 'foo' = 'bar'; |
| -- numRows: 0 rawDataSize: 0 |
| explain select * from loc_orc where false; |
| |
| -- OR evaluation. 1 row for OH and 1 row for CA |
| -- numRows: 2 rawDataSize: 204 |
| explain select * from loc_orc where state='OH' or state='CA'; |
| |
| -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 |
| -- numRows: 2 rawDataSize: 204 |
| explain select * from loc_orc where year=2001 and year is null; |
| -- numRows: 1 rawDataSize: 102 |
| explain select * from loc_orc where year=2001 and state='OH' and state='FL'; |
| |
| -- AND and OR together. left expr will yield 1 row and right will yield 1 row |
| -- numRows: 3 rawDataSize: 306 |
| explain select * from loc_orc where (year=2001 and year is null) or (state='CA'); |
| |
| -- AND and OR together. left expr will yield 8 rows and right will yield 1 row |
| -- numRows: 1 rawDataSize: 102 |
| explain select * from loc_orc where (year=2001 or year is null) and (state='CA'); |
| |
| -- inequality conditions falling out of range. total or zero (converted to one) |
| -- numRows: 1 rawDataSize: 102 |
| -- numRows: 8 rawDataSize: 804 |
| explain select * from loc_orc where locid < 30; |
| explain select * from loc_orc where locid > 30; |
| explain select * from loc_orc where locid <= 30; |
| explain select * from loc_orc where locid >= 30; |
| |
| -- all inequality conditions falling within range. rows/3 is the rules |
| -- numRows: 2 rawDataSize: 204 |
| explain select * from loc_orc where locid < 3; |
| explain select * from loc_orc where locid > 3; |
| explain select * from loc_orc where locid <= 3; |
| explain select * from loc_orc where locid >= 3; |