blob: 2ad6d364ab572ae3a90a69c1dae8726bcf3cd5a3 [file] [log] [blame]
SET hive.vectorized.execution.enabled=false;
set hive.compute.query.using.stats=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.exec.dynamic.partition=true;
set hive.exec.max.dynamic.partitions=1000;
set hive.exec.max.dynamic.partitions.pernode=1000;
set hive.optimize.sort.dynamic.partition.threshold=1;
create table over1k_n3(
t tinyint,
si smallint,
i int,
b bigint,
f float,
d double,
bo boolean,
s string,
ts timestamp,
`dec` decimal(4,2),
bin binary)
row format delimited
fields terminated by '|';
load data local inpath '../../data/files/over1k' into table over1k_n3;
create table over1k_part(
si smallint,
i int,
b bigint,
f float)
partitioned by (ds string, t tinyint);
create table over1k_part_limit like over1k_part;
create table over1k_part_buck(
si smallint,
i int,
b bigint,
f float)
partitioned by (t tinyint)
clustered by (si) into 4 buckets;
create table over1k_part_buck_sort(
si smallint,
i int,
b bigint,
f float)
partitioned by (t tinyint)
clustered by (si)
sorted by (f) into 4 buckets;
-- map-only jobs converted to map-reduce job by hive.optimize.sort.dynamic.partition optimization
explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
-- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization
explain insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
desc formatted over1k_part partition(ds="foo",t=27);
desc formatted over1k_part partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
desc formatted over1k_part_limit partition(ds="foo",t=27);
desc formatted over1k_part_limit partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
desc formatted over1k_part_buck partition(t=27);
desc formatted over1k_part_buck partition(t="__HIVE_DEFAULT_PARTITION__");
desc formatted over1k_part_buck_sort partition(t=27);
desc formatted over1k_part_buck_sort partition(t="__HIVE_DEFAULT_PARTITION__");
select count(*) from over1k_part;
select count(*) from over1k_part_limit;
select count(*) from over1k_part_buck;
select count(*) from over1k_part_buck_sort;
-- tests for HIVE-6883
create table over1k_part2(
si smallint,
i int,
b bigint,
f float)
partitioned by (ds string, t tinyint);
set hive.optimize.sort.dynamic.partition.threshold=-1;
explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 order by i;
set hive.optimize.sort.dynamic.partition.threshold=1;
explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 order by i;
explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_n3 order by i limit 10) tmp where t is null or t=27;
set hive.optimize.sort.dynamic.partition.threshold=-1;
explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 group by si,i,b,f,t;
set hive.optimize.sort.dynamic.partition.threshold=1;
-- tests for HIVE-8162, only partition column 't' should be in last RS operator
explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 group by si,i,b,f,t;
set hive.optimize.sort.dynamic.partition.threshold=-1;
insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 order by i;
desc formatted over1k_part2 partition(ds="foo",t=27);
desc formatted over1k_part2 partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
-- SORT_BEFORE_DIFF
select * from over1k_part2;
select count(*) from over1k_part2;
set hive.optimize.sort.dynamic.partition.threshold=1;
insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 order by i;
desc formatted over1k_part2 partition(ds="foo",t=27);
desc formatted over1k_part2 partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
-- SORT_BEFORE_DIFF
select * from over1k_part2;
select count(*) from over1k_part2;
-- hadoop-1 does not honor number of reducers in local mode. There is always only 1 reducer irrespective of the number of buckets.
-- Hence all records go to one bucket and all other buckets will be empty. Similar to HIVE-6867. However, hadoop-2 honors number
-- of reducers and records are spread across all reducers. To avoid this inconsistency we will make number of buckets to 1 for this test.
create table over1k_part_buck_sort2(
si smallint,
i int,
b bigint,
f float)
partitioned by (t tinyint)
clustered by (si)
sorted by (f) into 1 buckets;
set hive.optimize.sort.dynamic.partition.threshold=-1;
explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
set hive.optimize.sort.dynamic.partition.threshold=1;
explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
set hive.optimize.sort.dynamic.partition.threshold=-1;
insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
desc formatted over1k_part_buck_sort2 partition(t=27);
desc formatted over1k_part_buck_sort2 partition(t="__HIVE_DEFAULT_PARTITION__");
select * from over1k_part_buck_sort2;
select count(*) from over1k_part_buck_sort2;
set hive.optimize.sort.dynamic.partition.threshold=1;
insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27;
desc formatted over1k_part_buck_sort2 partition(t=27);
desc formatted over1k_part_buck_sort2 partition(t="__HIVE_DEFAULT_PARTITION__");
select * from over1k_part_buck_sort2;
select count(*) from over1k_part_buck_sort2;
create table over1k_part3(
si smallint,
b bigint,
f float)
partitioned by (s string, t tinyint, i int);
set hive.optimize.sort.dynamic.partition.threshold=1;
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where s="foo";
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where t=27;
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100;
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27;
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and s="foo";
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where t=27 and s="foo";
explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27 and s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where t=27;
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100;
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27;
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where t=27 and s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27 and s="foo";
select sum(hash(*)) from over1k_part3;
-- cross verify results with SDPO disabled
drop table over1k_part3;
create table over1k_part3(
si smallint,
b bigint,
f float)
partitioned by (s string, t tinyint, i int);
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where t=27;
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100;
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27;
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where t=27 and s="foo";
insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27 and s="foo";
select sum(hash(*)) from over1k_part3;
drop table over1k_n3;
create table over1k_n3(
t tinyint,
si smallint,
i int,
b bigint,
f float,
d double,
bo boolean,
s string,
ts timestamp,
`dec` decimal(4,2),
bin binary)
row format delimited
fields terminated by '|';
load data local inpath '../../data/files/over1k' into table over1k_n3;
analyze table over1k_n3 compute statistics for columns;
set hive.stats.fetch.column.stats=true;
-- default hive should do cost based and add extra RS
set hive.optimize.sort.dynamic.partition.threshold=0;
explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27;
-- default but shouldn't add extra RS
explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
-- disable
set hive.optimize.sort.dynamic.partition.threshold=-1;
explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27;
-- enable, will add extra RS
set hive.optimize.sort.dynamic.partition.threshold=1;
explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
create table over1k_part4_0(i int) partitioned by (s string);
create table over1k_part4_1(i int) partitioned by (s string);
EXPLAIN
WITH CTE AS (
select i, s from over1k_n3 where s like 'bob%'
)
FROM (
select * from CTE where i > 1 ORDER BY s
) src1k
insert overwrite table over1k_part4_0 partition(s)
select i+1, s
insert overwrite table over1k_part4_1 partition(s)
select i+0, s
;
WITH CTE AS (
select i, s from over1k_n3 where s like 'bob%'
)
FROM (
select * from CTE where i > 1 ORDER BY s
) src1k
insert overwrite table over1k_part4_0 partition(s)
select i+1, s
insert overwrite table over1k_part4_1 partition(s)
select i+0, s
;
select count(1) from over1k_part4_0;
select count(1) from over1k_part4_1;
drop table over1k_n3;