| #!/usr/bin/env perl |
| |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ############################################################################### |
| |
| # |
| # Do |
| # egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less |
| # to get an outline of this test conf file |
| # |
| |
| # Has a couple of Hive set directives: |
| # set hive.exec.dynamic.partition.mode=nonstrict; |
| # set hive.exec.dynamic.partition=true; |
| |
| |
| $cfg = { |
| 'driver' => 'Pig', |
| 'groups' => [ |
| # This first group should be moved to deployer ? |
| { |
| 'name' => 'Pig_Checkin', |
| 'tests' => [ |
| |
| { |
| 'num' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_checkin_1; |
| create table pig_checkin_1 (name string, age int, gpa double) STORED AS TEXTFILE;\ |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| store a into 'pig_checkin_1' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'result_table' => 'pig_checkin_1' |
| ,'sql' => q\select * from studenttab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 2 |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = load 'votertab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| c = join a by name, b by name; |
| store c into ':OUTPATH:';\, |
| ,'sql' => [ 'select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);'] |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 3 |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = load ':INPATH:/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:float); |
| c = join a by name, b by name; |
| store c into ':OUTPATH:';\ |
| ,'sql' => q\select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 4 |
| ,'hcat_prep'=>q\drop table if exists pig_checkin_4_1; |
| drop table if exists pig_checkin_4_2; |
| create table pig_checkin_4_1 (name string, age int, gpa double) STORED AS TEXTFILE; |
| create table pig_checkin_4_2 (name string, age int, gpa double) STORED AS TEXTFILE;\ |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| split a into b if age <=40, c if age > 40; |
| store b into 'pig_checkin_4_1' using org.apache.hcatalog.pig.HCatStorer(); |
| store c into 'pig_checkin_4_2' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'result_table' => ['pig_checkin_4_1','pig_checkin_4_2'] |
| ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;'] |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 5 |
| ,'hcat_prep'=>q\drop table if exists pig_checkin_5; |
| create table pig_checkin_5 (name string, age int, gpa double) STORED AS TEXTFILE;\ |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| split a into b if age <=40, c if age > 40; |
| store b into 'pig_checkin_5' using org.apache.hcatalog.pig.HCatStorer(); |
| store c into ':OUTPATH:';\, |
| ,'result_table' => ['pig_checkin_5','?'] |
| ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;'] |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 6 |
| ,'hcat_prep'=>q\drop table if exists pig_checkin_6; |
| create table pig_checkin_6 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ |
| ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = filter a by ds == '20110924'; |
| c = foreach b generate name, age; |
| store c into 'pig_checkin_6' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\, |
| #dump a;\, |
| ,'result_table' => 'pig_checkin_6', |
| ,'sql' => "select name, age, ds from studentparttab30k where ds='20110924';", |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 7 |
| ,'hcat_prep'=>q\drop table if exists pig_checkin_7; |
| create table pig_checkin_7 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ |
| ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age, ds; |
| store b into 'pig_checkin_7' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'result_table' => 'pig_checkin_7', |
| ,'sql' => "select name, age, ds from studentparttab30k;", |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| |
| ], |
| }, # end g |
| { |
| 'name' => 'Pig_Read', |
| 'tests' => [ |
| |
| { |
| 'ignore' => 1, # Need to checkin HCATALOG-168. |
| 'num' => 1 |
| ,'pig' => q\a = load 'all100k' using org.apache.hcatalog.pig.HCatLoader(); |
| store a into ':OUTPATH:';\, |
| ,'sql' => q\select * from all100k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 2 |
| ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate s, i, d; |
| store b into ':OUTPATH:';\, |
| ,'sql' => q\select s, i, d from all100kjson;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 3 |
| ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age; |
| store b into ':OUTPATH:';\, |
| ,'sql' => q\select name, age from all100krc;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| # A table with one partition in text and one in rc |
| 'num' => 4 |
| ,'hcat_prep'=>q?drop table if exists pig_read_4; |
| create external table pig_read_4 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile; |
| alter table pig_read_4 add partition (b='1') location '/user/hcat/tests/data/studenttab10k'; |
| alter table pig_read_4 set fileformat rcfile; |
| alter table pig_read_4 add partition (b='2') location '/user/hcat/tests/data/all100krc';? |
| ,'pig' => q\a = load 'pig_read_4' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age, b; |
| store b into ':OUTPATH:';\, |
| ,'sql' => q\(select name, age, 1 from studenttab10k) |
| union all |
| (select name, age, 2 from all100krc);\ |
| }, |
| { |
| # Read from a table in the non-default database |
| 'num' => 5 |
| ,'hcat_prep'=>q?create database if not exists pig_db_1; |
| drop table if exists pig_db_1.pig_read_5; |
| create external table pig_db_1.pig_read_5 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile; |
| use pig_db_1; |
| alter table pig_read_5 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';? |
| ,'pig' => q\a = load 'pig_db_1.pig_read_5' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age, b; |
| store b into ':OUTPATH:';\, |
| ,'sql' => q\select name, age, 1 from studenttab10k;\ |
| } |
| ], |
| }, # end g |
| { |
| 'name' => 'Pig_Write', |
| 'tests' => [ |
| { |
| 'ignore' => 1, # Need to checkin HCATALOG-168. |
| 'num' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_write_1; |
| create table pig_write_1(t tinyint,si smallint,i int,b bigint,bool boolean,f float,d double,s string) stored as rcfile;\ |
| ,'pig' => q\a = load ':INPATH:/all100k' using PigStorage(':') as (t:int,si:int,i:int,b:int,bo:boolean,f:float,d:double,s:chararray); |
| store a into 'pig_write_1' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'result_table' => 'pig_write_1' |
| ,'sql' => q\select * from all100k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 2 |
| ,'hcat_prep'=>q\drop table if exists pig_write_2; |
| create table pig_write_2( |
| s string, |
| i int, |
| d double, |
| m map<string, string>, |
| bb array<struct<a: int, b: string>>) |
| row format serde 'org.apache.hcatalog.data.JsonSerDe' |
| STORED AS TEXTFILE; |
| \ |
| ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate s, i, d; |
| store b into 'pig_write_2' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'sql' => q\select IFNULL(s, ""), IFNULL(i, ""), IFNULL(d, "") from all100kjson;\ |
| ,'result_table' => 'pig_write_2' |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| 'num' => 3 |
| ,'hcat_prep'=>q\drop table if exists pig_write_3; |
| create table pig_write_3( |
| name string, |
| age int, |
| gpa double) |
| stored as rcfile; |
| \ |
| ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age; |
| store b into 'pig_write_3' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'sql' => q\select name, age from all100krc;\ |
| ,'result_table' => 'pig_write_3' |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| # Store in a sequence file |
| 'num' => 4 |
| ,'hcat_prep'=>q\drop table if exists pig_write_4; |
| create table pig_write_4( |
| name string, |
| age int, |
| gpa double) |
| stored as sequencefile; |
| \ |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age, 0.1; |
| c = foreach b generate name, age, $2 as gpa; |
| store c into 'pig_write_4' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'sql' => q\select name, age, 0.1 from studenttab10k;\ |
| ,'result_table' => 'pig_write_4' |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| # Write to a table in the non-default database |
| 'num' => 5 |
| ,'hcat_prep'=>q?create database if not exists pig_db_1; |
| create table if not exists pig_db_1.pig_write_5 (name string, age int) row format delimited fields terminated by '\t' stored as textfile;? |
| ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name, age; |
| store b into 'pig_db_1.pig_write_5' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'sql' => q\select name, age from studenttab10k;\ |
| ,'result_table' => 'pig_db_1.pig_write_5' |
| } |
| ], |
| }, # end g |
| { |
| 'name' => 'Pig_Change_Schema', |
| 'tests' => [ |
| { |
| # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. |
| 'num' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_change_schema_1; |
| create table pig_change_schema_1 (name string) partitioned by (ds string) STORED AS TEXTFILE;\ |
| ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = filter a by ds == '20110924'; |
| c = foreach b generate name; |
| store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\, |
| ,'result_table' => 'pig_change_schema_1' |
| ,'sql' => q\select name, ds from studentparttab30k where ds='20110924';\ |
| }, |
| { |
| # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. |
| 'num' => 2 |
| ,'depends_on' => 'Pig_Change_Schema_1' |
| ,'hcat_prep'=>q\alter table pig_change_schema_1 add columns (age int);\ |
| ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = filter a by ds == '20110925'; |
| c = foreach b generate name, age; |
| store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110925');\, |
| ,'result_table' => 'pig_change_schema_1' |
| ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924') |
| union all |
| (select name, age, ds from studentparttab30k where ds = '20110925');\ |
| }, |
| { |
| # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. |
| 'num' => 3 |
| , 'depends_on' => 'Pig_Change_Schema_2' |
| ,'pig' => q\a = load 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatLoader(); |
| c = foreach a generate name, age, ds; |
| store c into ':OUTPATH:';\ |
| ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924') |
| union all |
| (select name, age, ds from studentparttab30k where ds = '20110925');\ |
| } |
| ], |
| }, |
| { |
| 'name' => 'Pig_HBase', |
| 'tests' => [ |
| { |
| 'num' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_hbase_1; |
| create table pig_hbase_1(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa');\ |
| ,'pig' => q\set hcat.hbase.output.bulkMode 'false' |
| a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float); |
| b = group a by name; |
| c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa; |
| d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa; |
| store d into 'pig_hbase_1' using org.apache.hcatalog.pig.HCatStorer(); |
| exec |
| e = load 'pig_hbase_1' using org.apache.hcatalog.pig.HCatLoader(); |
| store e into ':OUTPATH:';\, |
| ,'result_table' => ['pig_hbase_1','?'] |
| ,'sql' => [ 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;' ] |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| # multiquery |
| 'num' => 2 |
| ,'hcat_prep'=>q\drop table if exists pig_hbase_2_1; |
| create table pig_hbase_2_1(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa'); |
| drop table if exists pig_hbase_2_2; |
| create table pig_hbase_2_2(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa'); |
| \ |
| ,'pig' => q\set hcat.hbase.output.bulkMode 'false' |
| a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float); |
| b = group a by name; |
| c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa; |
| d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa; |
| store d into 'pig_hbase_2_1' using org.apache.hcatalog.pig.HCatStorer(); |
| store d into 'pig_hbase_2_2' using org.apache.hcatalog.pig.HCatStorer();\, |
| ,'result_table' => ['pig_hbase_2_1','pig_hbase_2_2'] |
| ,'sql' => [ 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;'] |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| ], |
| }, # end g |
| { |
| 'name' => 'Pig_HCat_Barrier', |
| 'tests' => [ |
| { |
| 'num' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_1; |
| create table pig_hcat_barrier_1 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\ |
| ,'pig' => q\ |
| a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double); |
| store a into 'pig_hcat_barrier_1' using org.apache.hcatalog.pig.HCatStorer('b=1'); \, |
| ,'expected_err_regex' => 'not supported' |
| }, |
| { |
| 'num' => 2 |
| ,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_2; |
| create table pig_hcat_barrier_2 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) SORTED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\ |
| ,'pig' => q\ |
| a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double); |
| store a into 'pig_hcat_barrier_2' using org.apache.hcatalog.pig.HCatStorer('b=1'); \, |
| ,'expected_err_regex' => 'not supported' |
| }, |
| ], |
| }, # end g |
| { |
| 'name' => 'Pig_HCAT_COOP', |
| 'tests' => [ |
| { |
| # test if Pig can load the table after various table schema change |
| 'num' => 1 |
| ,'hcat_prep'=>q:drop table if exists pig_hcat_coop_1; |
| create external table pig_hcat_coop_1 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as TEXTFILE; |
| alter table pig_hcat_coop_1 add partition (b='1') location '/user/hcat/tests/data/studenttab10k'; |
| alter table pig_hcat_coop_1 partition(b='1') set fileformat TEXTFILE; |
| alter table pig_hcat_coop_1 change gpa registration string; |
| alter table pig_hcat_coop_1 add columns (contributions float); |
| alter table pig_hcat_coop_1 add partition (b='2') location '/user/hcat/tests/data/votertab10k'; |
| alter table pig_hcat_coop_1 partition(b='2') set fileformat TEXTFILE; |
| alter table pig_hcat_coop_1 replace columns (name string, age int); |
| : |
| ,'pig' => q\ |
| a = load 'pig_hcat_coop_1' using org.apache.hcatalog.pig.HCatLoader(); |
| store a into ':OUTPATH:';\, |
| ,'sql' => q\select name, age, '1' from studenttab10k union all select name, age, '2' from votertab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, |
| { |
| # test if Pig can load table after fileformat change and table schema change |
| 'num' => 2 |
| ,'hcat_prep'=>q:drop table if exists pig_hcat_coop_2; |
| create external table pig_hcat_coop_2 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as TEXTFILE; |
| alter table pig_hcat_coop_2 add partition (b='1') location '/user/hcat/tests/data/studenttab10k'; |
| alter table pig_hcat_coop_2 partition(b='1') set fileformat TEXTFILE; |
| alter table pig_hcat_coop_2 add partition (b='2') location '/user/hcat/tests/data/all100krc'; |
| alter table pig_hcat_coop_2 partition(b='2') set fileformat RCFILE; |
| alter table pig_hcat_coop_2 replace columns (age int, name string); |
| : |
| ,'pig' => q\ |
| a = load 'pig_hcat_coop_2' using org.apache.hcatalog.pig.HCatLoader(); |
| store a into ':OUTPATH:';\, |
| ,'sql' => q\select age, name, '1' from studenttab10k union all select age, name, '2' from all100krc;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| ], |
| },{ |
| 'name' => 'Pig_Complex', |
| 'tests' => [ |
| { |
| # test reading tuples from the complex table |
| 'num' => 1 |
| ,'pig' => q\ |
| a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate flatten(s); |
| store b into ':OUTPATH:';\, |
| ,'sql' => q\select IFNULL(name, ""), IFNULL(age, ""), IFNULL(gpa, "") from studentcomplextab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # test reading maps from the complex table |
| 'num' => 2 |
| ,'pig' => q\ |
| a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate s.name as n1, m#'name' as n2; |
| c = filter b by n1 != '' and n2 is not null; |
| store c into ':OUTPATH:';\, |
| ,'sql' => q\select t.name, m.mvalue |
| from studentcomplextab10k t, studentcomplextab10k_map m |
| where t.id = m.tid and t.name is not null and m.mkey = 'name';\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # test reading arrays from the complex table |
| 'num' => 3 |
| ,'pig' => q\ |
| a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate s.name as n1, flatten(a); |
| c = filter b by n1 != '' ; |
| store c into ':OUTPATH:';\, |
| ,'sql' => q\select t.name, m.lvalue |
| from studentcomplextab10k t, studentcomplextab10k_list m |
| where t.id = m.tid and t.name is not null;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # test writing tuples to a complex table. This also tests reading with default separators. |
| 'num' => 4 |
| ,'notmq' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_complex_4; |
| create table pig_complex_4 (s struct<name: string, age: int, gpa: double>) STORED AS TEXTFILE;\ |
| ,'pig' => q\ |
| a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate TOTUPLE(name, age, gpa) as s; |
| store b into 'pig_complex_4' using org.apache.hcatalog.pig.HCatStorer(); |
| exec; |
| c = load 'pig_complex_4' using org.apache.hcatalog.pig.HCatLoader(); |
| d = foreach c generate flatten(s); |
| store d into ':OUTPATH:';\ |
| ,'sql' => q\select name, age, gpa from studenttab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # test writing maps to a complex table. This also tests reading with default separators. |
| 'num' => 5 |
| ,'notmq' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_complex_5; |
| create table pig_complex_5 (m map<string, string>) STORED AS TEXTFILE;\ |
| ,'pig' => q\ |
| a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate TOMAP('name', name, 'age', (chararray)age, 'gpa', (chararray)gpa) as m; |
| store b into 'pig_complex_5' using org.apache.hcatalog.pig.HCatStorer(); |
| exec; |
| c = load 'pig_complex_5' using org.apache.hcatalog.pig.HCatLoader(); |
| d = foreach c generate m#'name', m#'age', m#'gpa'; |
| store d into ':OUTPATH:';\ |
| ,'sql' => q\select name, age, gpa from studenttab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| }, { |
| # test writing bags to a complex table. This also tests reading with default separators. |
| 'num' => 6 |
| ,'notmq' => 1 |
| ,'hcat_prep'=>q\drop table if exists pig_complex_6; |
| create table pig_complex_6 (a array<string>) STORED AS TEXTFILE;\ |
| ,'pig' => q\ |
| a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); |
| b = foreach a generate name; |
| c = distinct b; |
| d = group c all; |
| e = foreach d generate $1 as a; |
| store e into 'pig_complex_6' using org.apache.hcatalog.pig.HCatStorer(); |
| exec; |
| f = load 'pig_complex_6' using org.apache.hcatalog.pig.HCatLoader(); |
| g = foreach f generate flatten(a); |
| store g into ':OUTPATH:';\ |
| ,'sql' => q\select distinct name from studenttab10k;\ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| ], |
| } # end g |
| ] |
| } |