blob: d2e62dc23a7d1de2ab045e88343ff31af3b5f894 [file] [log] [blame]
--! qt:dataset:src
drop table hbsort;
drop table hbpartition;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-- this is a dummy table used for controlling how the HFiles are
-- created
create table hbsort(key string, val string, val2 string)
stored as
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
TBLPROPERTIES ('hfile.family.path' = '/tmp/hbsort/cf');
-- this is a dummy table used for controlling how the input file
-- for TotalOrderPartitioner is created
create table hbpartition(part_break string)
row format serde
'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
stored as
inputformat
'org.apache.hadoop.mapred.TextInputFormat'
outputformat
'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat'
location '/tmp/data/hbpartition';
-- this should produce one file, but we do not
-- know what it will be called, so we will copy it to a well known
-- filename /tmp/hbpartition.lst
insert overwrite table hbpartition
select distinct value
from src
where value='val_100' or value='val_200';
dfs -count /tmp/data/hbpartition;
dfs -cp -f /tmp/data/hbpartition/* /tmp/hbpartition.lst;
set mapred.reduce.tasks=3;
set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;
set total.order.partitioner.natural.order=false;
set total.order.partitioner.path=/tmp/hbpartition.lst;
set mapreduce.totalorderpartitioner.naturalorder=false;
set mapreduce.totalorderpartitioner.path=/tmp/hbpartition.lst;
-- this should produce three files in /tmp/hbsort/cf
-- include some trailing blanks and nulls to make sure we handle them correctly
insert overwrite table hbsort
select distinct value,
case when key=103 then cast(null as string) else key end,
case when key=103 then ''
else cast(key+1 as string) end
from src
cluster by value;
dfs -count /tmp/hbsort/cf;
-- To get the files out to your local filesystem for loading into
-- HBase, run mkdir -p /tmp/blah/cf, then uncomment and
-- semicolon-terminate the line below before running this test:
-- dfs -copyToLocal /tmp/hbsort/cf/* /tmp/blah/cf
drop table hbsort;
drop table hbpartition;