| #!/usr/bin/env perl |
| |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ############################################################################### |
| |
| # |
| # Do |
| # egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less |
| # to get an outline of this test conf file |
| # |
| |
| # Has a couple of Hive set directives: |
| # set hive.exec.dynamic.partition.mode=nonstrict; |
| # set hive.exec.dynamic.partition=true; |
| |
| |
| $cfg = { |
| 'driver' => 'Hadoop', |
| 'groups' => [ |
| # This first group should be moved to deployer ? |
| { |
| 'name' => 'Hadoop_Checkin', |
| 'tests' => [ |
| { |
| 'num' => 1 |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH: |
| \, |
| ,'sql' => q\select name, age from studenttab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 2 |
| ,'hcat_prep'=>q\drop table if exists hadoop_checkin_2; |
| create table hadoop_checkin_2 (name string, age int, gpa double) STORED AS TEXTFILE;\ |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_2 |
| \, |
| ,'result_table' => 'hadoop_checkin_2' |
| ,'sql' => q\select * from studenttab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 3 |
| ,'hcat_prep'=>q\drop table if exists hadoop_checkin_3; |
| create table hadoop_checkin_3 (age int, cnt int) STORED AS TEXTFILE;\ |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.GroupByAge -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_3 |
| \, |
| ,'result_table' => 'hadoop_checkin_3' |
| ,'sql' => q\select age, count(*) from studenttab10k group by age;\ |
| },{ |
| # Read from a partitioned table |
| 'num' => 4 |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k :OUTPATH: |
| \, |
| ,'sql' => q\select name, age from studentparttab30k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # Write a single partition to a partitioned table |
| 'num' => 5 |
| ,'hcat_prep'=>q\drop table if exists hadoop_checkin_5; |
| create table hadoop_checkin_5 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ |
| ,'hadoop' => q? |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_5 ds=\"20110924\" |
| ?, |
| ,'result_table' => 'hadoop_checkin_5' |
| ,'sql' => q\select name, age, ds from studentparttab30k where ds='20110924';\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, { |
| # Write a multiple partitions to a partitioned table |
| 'num' => 6 |
| ,'hcat_prep'=>q\drop table if exists hadoop_checkin_6; |
| create table hadoop_checkin_6 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_6 |
| \, |
| ,'result_table' => 'hadoop_checkin_6' |
| ,'sql' => q\select name, age, ds from studentparttab30k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| ], |
| }, # end g |
| { |
| 'name' => 'Hadoop_Read', |
| 'tests' => [ |
| { |
| 'num' => 1 |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadText -libjars :HCAT_JAR: :THRIFTSERVER: all100k :OUTPATH: |
| \, |
| ,'sql' => q\select * from all100k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 2 |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson :OUTPATH: |
| \, |
| ,'sql' => q\select s, i, d from all100kjson;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 3 |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc :OUTPATH: |
| \, |
| ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| ], |
| }, # end g |
| { |
| 'name' => 'Hadoop_Write', |
| 'tests' => [ |
| { |
| 'ignore' => 1, # Need to checkin HCATALOG-168. |
| 'num' => 1 |
| ,'hcat_prep'=>q\ |
| drop table if exists hadoop_write_1; |
| create table hadoop_write_1( |
| t tinyint, |
| si smallint, |
| i int, |
| b bigint, |
| f float, |
| d double, |
| s string) |
| row format delimited |
| fields terminated by ':' |
| stored as textfile;\ |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteText -libjars :HCAT_JAR: :THRIFTSERVER: all100k hadoop_write_1 |
| \, |
| ,'result_table' => 'hadoop_write_1' |
| ,'sql' => q\select * from all100k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 2 |
| ,'hcat_prep' => q\ |
| drop table if exists hadoop_write_2; |
| create table hadoop_write_2( |
| s string, |
| i int, |
| d double, |
| m map<string, string>, |
| bb array<struct<a: int, b: string>>) |
| row format serde 'org.apache.hcatalog.data.JsonSerDe' |
| stored as textfile;\ |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson hadoop_write_2 |
| \, |
| ,'result_table' => 'hadoop_write_2' |
| ,'sql' => q\select s, i, d, '', '' from all100kjson;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 3 |
| ,'hcat_prep' => q\ |
| drop table if exists hadoop_write_3; |
| create table hadoop_write_3( |
| name string, |
| age int, |
| gpa double) |
| stored as rcfile; |
| \, |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_3 |
| \, |
| ,'result_table' => 'hadoop_write_3' |
| ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| 'num' => 4 |
| ,'hcat_prep' => q\ |
| drop table if exists hadoop_write_4; |
| create table hadoop_write_4( |
| name string, |
| age int, |
| gpa double) |
| stored as sequencefile; |
| \, |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_4 |
| \, |
| ,'result_table' => 'hadoop_write_4' |
| ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| ], |
| }, # end g |
| { |
| 'name' => 'Hadoop_HBase', |
| 'tests' => [ |
| { |
| 'num' => 1 |
| ,'hcat_prep'=>q\drop table if exists hadoop_hbase_1; |
| create table hadoop_hbase_1(key string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:gpa');\ |
| ,'hadoop' => q\ |
| jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.HBaseReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: :INPATH:/studenttab10k hadoop_hbase_1 :OUTPATH: |
| \, |
| ,'sql' => q\select name, sum(gpa) from studenttab10k group by name;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| ], |
| }, # end g |
| ] |
| } |