| --! qt:dataset:src |
| drop table hbsort; |
| drop table hbpartition; |
| |
| set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; |
| |
| -- this is a dummy table used for controlling how the HFiles are |
| -- created |
| create table hbsort(key string, val string, val2 string) |
| stored as |
| INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat' |
| TBLPROPERTIES ('hfile.family.path' = '/tmp/hbsort/cf'); |
| |
| -- this is a dummy table used for controlling how the input file |
| -- for TotalOrderPartitioner is created |
| create table hbpartition(part_break string) |
| row format serde |
| 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe' |
| stored as |
| inputformat |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| outputformat |
| 'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat' |
| location '/tmp/data/hbpartition'; |
| |
| -- this should produce one file, but we do not |
| -- know what it will be called, so we will copy it to a well known |
| -- filename /tmp/hbpartition.lst |
| insert overwrite table hbpartition |
| select distinct value |
| from src |
| where value='val_100' or value='val_200'; |
| |
| dfs -count /tmp/data/hbpartition; |
| dfs -cp -f /tmp/data/hbpartition/* /tmp/hbpartition.lst; |
| |
| set mapred.reduce.tasks=3; |
| set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner; |
| set total.order.partitioner.natural.order=false; |
| set total.order.partitioner.path=/tmp/hbpartition.lst; |
| set mapreduce.totalorderpartitioner.naturalorder=false; |
| set mapreduce.totalorderpartitioner.path=/tmp/hbpartition.lst; |
| |
| -- this should produce three files in /tmp/hbsort/cf |
| -- include some trailing blanks and nulls to make sure we handle them correctly |
| insert overwrite table hbsort |
| select distinct value, |
| case when key=103 then cast(null as string) else key end, |
| case when key=103 then '' |
| else cast(key+1 as string) end |
| from src |
| cluster by value; |
| |
| dfs -count /tmp/hbsort/cf; |
| |
| -- To get the files out to your local filesystem for loading into |
| -- HBase, run mkdir -p /tmp/blah/cf, then uncomment and |
| -- semicolon-terminate the line below before running this test: |
| -- dfs -copyToLocal /tmp/hbsort/cf/* /tmp/blah/cf |
| |
| drop table hbsort; |
| drop table hbpartition; |