ql/src/test/queries/clientpositive/union_remove_6_subq.q - hive - Git at Google

 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
 set hive.stats.autogather=false;
 set hive.optimize.union.remove=true;

 set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false;

 -- SORT_QUERY_RESULTS

 -- This is to test the union->selectstar->filesink optimization
 -- Union of 2 subqueries is performed (all of which are mapred queries)
 -- followed by select star and a file sink in 2 output tables.
 -- The optimiaztion does not take affect since it is a multi-table insert.
 -- It does not matter, whether the output is merged or not. In this case,
 -- merging is turned off

 create table inputTbl1_n0(key string, val string) stored as textfile;
 create table outputTbl1_n0(key string, `values` bigint) stored as textfile;
 create table outputTbl2(key string, `values` bigint) stored as textfile;

 load data local inpath '../../data/files/T1.txt' into table inputTbl1_n0;

 explain
 FROM (
   select * from(
   SELECT key, count(1) as `values` from inputTbl1_n0 group by key
   UNION ALL
   SELECT key, count(1) as `values` from inputTbl1_n0 group by key
   )subq
 ) a
 insert overwrite table outputTbl1_n0 select *
 insert overwrite table outputTbl2 select *;

 FROM (
   select * from(
   SELECT key, count(1) as `values` from inputTbl1_n0 group by key
   UNION ALL
   SELECT key, count(1) as `values` from inputTbl1_n0 group by key
   )subq
 ) a
 insert overwrite table outputTbl1_n0 select *
 insert overwrite table outputTbl2 select *;

 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 select * from outputTbl1_n0;
 select * from outputTbl2;

 -- The following queries guarantee the correctness.
 explain
 select avg(c) from(
   SELECT count(1)-200 as c from src
   UNION ALL
   SELECT count(1) as c from src
 )subq;

 select avg(c) from(
   SELECT count(1)-200 as c from src
   UNION ALL
   SELECT count(1) as c from src
 )subq;

 explain
 select key, avg(c) over w from(
   SELECT key, count(1)*2 as c from src group by key
   UNION ALL
   SELECT key, count(1) as c from src group by key
 )subq group by key, c
 WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);

 select key, avg(c) over w from(
   SELECT key, count(1)*2 as c from src group by key
   UNION ALL
   SELECT key, count(1) as c from src group by key
 )subq group by key, c
 WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);
	--! qt:dataset:src
	set hive.mapred.mode=nonstrict;
	set hive.stats.autogather=false;
	set hive.optimize.union.remove=true;

	set hive.merge.mapfiles=false;
	set hive.merge.mapredfiles=false;

	-- SORT_QUERY_RESULTS

	-- This is to test the union->selectstar->filesink optimization
	-- Union of 2 subqueries is performed (all of which are mapred queries)
	-- followed by select star and a file sink in 2 output tables.
	-- The optimiaztion does not take affect since it is a multi-table insert.
	-- It does not matter, whether the output is merged or not. In this case,
	-- merging is turned off

	create table inputTbl1_n0(key string, val string) stored as textfile;
	create table outputTbl1_n0(key string, `values` bigint) stored as textfile;
	create table outputTbl2(key string, `values` bigint) stored as textfile;

	load data local inpath '../../data/files/T1.txt' into table inputTbl1_n0;

	explain
	FROM (
	select * from(
	SELECT key, count(1) as `values` from inputTbl1_n0 group by key
	UNION ALL
	SELECT key, count(1) as `values` from inputTbl1_n0 group by key
	)subq
	) a
	insert overwrite table outputTbl1_n0 select *
	insert overwrite table outputTbl2 select *;

	FROM (
	select * from(
	SELECT key, count(1) as `values` from inputTbl1_n0 group by key
	UNION ALL
	SELECT key, count(1) as `values` from inputTbl1_n0 group by key
	)subq
	) a
	insert overwrite table outputTbl1_n0 select *
	insert overwrite table outputTbl2 select *;

	set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
	select * from outputTbl1_n0;
	select * from outputTbl2;

	-- The following queries guarantee the correctness.
	explain
	select avg(c) from(
	SELECT count(1)-200 as c from src
	UNION ALL
	SELECT count(1) as c from src
	)subq;

	select avg(c) from(
	SELECT count(1)-200 as c from src
	UNION ALL
	SELECT count(1) as c from src
	)subq;

	explain
	select key, avg(c) over w from(
	SELECT key, count(1)*2 as c from src group by key
	UNION ALL
	SELECT key, count(1) as c from src group by key
	)subq group by key, c
	WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);

	select key, avg(c) over w from(
	SELECT key, count(1)*2 as c from src group by key
	UNION ALL
	SELECT key, count(1) as c from src group by key
	)subq group by key, c
	WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);