blob: a328ff9d3f2146c05f749393ac0c0b49d5d89451 [file] [log] [blame]
set hive.stats.column.autogather=false;
set hive.strict.checks.bucketing=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.auto.convert.join=true;
set hive.join.emit.interval=2;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
-- SORT_QUERY_RESULTS
-- Single partition
-- Regular load happens.
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS TEXTFILE;
explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n8 partition(ds='2008-04-08');
load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n8 partition(ds='2008-04-08');
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Triggers a Tez job as partition info is missing from load data.
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Multi partitions
-- Triggers a Tez job as partition info is missing from load data.
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Multi partitions and directory with files (no sub dirs)
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Bucketing
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Single partition and bucketing
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Multiple partitions and bucketing
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Multiple partitions, bucketing, and directory with files (no sub dirs)
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Multiple partitions, bucketing, and directory with files and sub dirs
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Single partition, multiple buckets
CREATE TABLE srcbucket_mapjoin_n8(key int, value string, ds string) partitioned by (hr int) clustered by (key, value) sorted by (key, value) into 5 buckets STORED AS TEXTFILE;
explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin_n8;
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Load into ORC table using text files
CREATE TABLE srcbucket_mapjoin_n8(key int, value string) partitioned by (ds string) STORED AS ORC;
explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin_n8
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
select * from srcbucket_mapjoin_n8;
drop table srcbucket_mapjoin_n8;
-- Load into ACID table using ORC files
set hive.mapred.mode=nonstrict;
set hive.optimize.ppd=true;
set hive.optimize.index.filter=true;
set hive.tez.bucket.pruning=true;
set hive.explain.user=false;
set hive.fetch.task.conversion=none;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true');
explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
select * from orc_test_txn;
-- Test Load Overwrite.
load data local inpath '../../data/files/load_data_job_acid' OVERWRITE into table orc_test_txn;
select count(*) from orc_test_txn;
select * from orc_test_txn;
load data local inpath '../../data/files/load_data_job_acid' OVERWRITE into table orc_test_txn;
select count(*) from orc_test_txn;
select * from orc_test_txn;