ql/src/test/queries/clientpositive/load_data_using_job.q - hive - Git at Google

 set hive.stats.column.autogather=false;
 set hive.strict.checks.bucketing=false;

 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 set hive.auto.convert.join=true;
 set hive.join.emit.interval=2;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=10000;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;

 -- SORT_QUERY_RESULTS

 -- Single partition
 -- Regular load happens.
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n8 partition(ds='2008-04-08');
 load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n8 partition(ds='2008-04-08');
 select * from srcbucket_mapjoin_n8;

 drop table srcbucket_mapjoin_n8;

 -- Triggers a Tez job as partition info is missing from load data.
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Multi partitions
 -- Triggers a Tez job as partition info is missing from load data.
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Multi partitions and directory with files (no sub dirs)
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Bucketing
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Single partition and bucketing
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Multiple partitions and bucketing
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Multiple partitions, bucketing, and directory with files (no sub dirs)
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Multiple partitions, bucketing, and directory with files and sub dirs
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Single partition, multiple buckets
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string, ds string) partitioned by (hr int) clustered by (key, value) sorted by (key, value) into 5 buckets STORED AS TEXTFILE;
 explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
 load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Load into ORC table using text files
 CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS ORC;
 explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8
 INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
 SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8
 INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
 SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 select * from srcbucket_mapjoin_n8;
 drop table srcbucket_mapjoin_n8;

 -- Load into ACID table using ORC files
 set hive.mapred.mode=nonstrict;
 set hive.optimize.ppd=true;
 set hive.optimize.index.filter=true;
 set hive.tez.bucket.pruning=true;
 set hive.explain.user=false;
 set hive.fetch.task.conversion=none;
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

 CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true');
 explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
 load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;

 select * from orc_test_txn;

 -- Test Load Overwrite.

 load data local inpath '../../data/files/load_data_job_acid' OVERWRITE into table orc_test_txn;

 select count(*) from orc_test_txn;
 select * from orc_test_txn;

 load data local inpath '../../data/files/load_data_job_acid' OVERWRITE into table orc_test_txn;

 select count(*) from orc_test_txn;
 select * from orc_test_txn;
	set hive.stats.column.autogather=false;
	set hive.strict.checks.bucketing=false;

	set hive.mapred.mode=nonstrict;
	set hive.explain.user=false;
	set hive.auto.convert.join=true;
	set hive.join.emit.interval=2;
	set hive.auto.convert.join.noconditionaltask=true;
	set hive.auto.convert.join.noconditionaltask.size=10000;
	set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;

	-- SORT_QUERY_RESULTS

	-- Single partition
	-- Regular load happens.
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n8 partition(ds='2008-04-08');
	load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n8 partition(ds='2008-04-08');
	select * from srcbucket_mapjoin_n8;

	drop table srcbucket_mapjoin_n8;

	-- Triggers a Tez job as partition info is missing from load data.
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Multi partitions
	-- Triggers a Tez job as partition info is missing from load data.
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Multi partitions and directory with files (no sub dirs)
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Bucketing
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Single partition and bucketing
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Multiple partitions and bucketing
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Multiple partitions, bucketing, and directory with files (no sub dirs)
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Multiple partitions, bucketing, and directory with files and sub dirs
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Single partition, multiple buckets
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string, ds string) partitioned by (hr int) clustered by (key, value) sorted by (key, value) into 5 buckets STORED AS TEXTFILE;
	explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
	load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Load into ORC table using text files
	CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS ORC;
	explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8
	INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
	SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
	load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8
	INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
	SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
	select * from srcbucket_mapjoin_n8;
	drop table srcbucket_mapjoin_n8;

	-- Load into ACID table using ORC files
	set hive.mapred.mode=nonstrict;
	set hive.optimize.ppd=true;
	set hive.optimize.index.filter=true;
	set hive.tez.bucket.pruning=true;
	set hive.explain.user=false;
	set hive.fetch.task.conversion=none;
	set hive.support.concurrency=true;
	set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

	CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true');
	explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
	load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;

	select * from orc_test_txn;

	-- Test Load Overwrite.

	load data local inpath '../../data/files/load_data_job_acid' OVERWRITE into table orc_test_txn;

	select count(*) from orc_test_txn;
	select * from orc_test_txn;

	load data local inpath '../../data/files/load_data_job_acid' OVERWRITE into table orc_test_txn;

	select count(*) from orc_test_txn;
	select * from orc_test_txn;