blob: ac36e5a3f4b53fd627d3e260faa747f655ef59ab [file] [log] [blame]
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
-- SORT_QUERY_RESULTS
-- This is to test the union->selectstar->filesink optimization
-- Union of 2 subqueries is performed (all of which are mapred queries)
-- followed by select star and a file sink in 2 output tables.
-- The optimiaztion does not take affect since it is a multi-table insert.
-- It does not matter, whether the output is merged or not. In this case,
-- merging is turned off
create table inputTbl1_n0(key string, val string) stored as textfile;
create table outputTbl1_n0(key string, `values` bigint) stored as textfile;
create table outputTbl2(key string, `values` bigint) stored as textfile;
load data local inpath '../../data/files/T1.txt' into table inputTbl1_n0;
explain
FROM (
select * from(
SELECT key, count(1) as `values` from inputTbl1_n0 group by key
UNION ALL
SELECT key, count(1) as `values` from inputTbl1_n0 group by key
)subq
) a
insert overwrite table outputTbl1_n0 select *
insert overwrite table outputTbl2 select *;
FROM (
select * from(
SELECT key, count(1) as `values` from inputTbl1_n0 group by key
UNION ALL
SELECT key, count(1) as `values` from inputTbl1_n0 group by key
)subq
) a
insert overwrite table outputTbl1_n0 select *
insert overwrite table outputTbl2 select *;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
select * from outputTbl1_n0;
select * from outputTbl2;
-- The following queries guarantee the correctness.
explain
select avg(c) from(
SELECT count(1)-200 as c from src
UNION ALL
SELECT count(1) as c from src
)subq;
select avg(c) from(
SELECT count(1)-200 as c from src
UNION ALL
SELECT count(1) as c from src
)subq;
explain
select key, avg(c) over w from(
SELECT key, count(1)*2 as c from src group by key
UNION ALL
SELECT key, count(1) as c from src group by key
)subq group by key, c
WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);
select key, avg(c) over w from(
SELECT key, count(1)*2 as c from src group by key
UNION ALL
SELECT key, count(1) as c from src group by key
)subq group by key, c
WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);