blob: 5b0ad9bcc964193a22765eb626941f3ae7465ec6 [file] [log] [blame]
#!/usr/bin/env perl
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
###############################################################################
#
# Do
# egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
# to get an outline of this test conf file
#
# Has a couple of Hive set directives:
# set hive.exec.dynamic.partition.mode=nonstrict;
# set hive.exec.dynamic.partition=true;
$cfg = {
'driver' => 'Pig',
'groups' => [
# This first group should be moved to deployer ?
{
'name' => 'Pig_Checkin',
'tests' => [
{
'num' => 1
,'hcat_prep'=>q\drop table if exists pig_checkin_1;
create table pig_checkin_1 (name string, age int, gpa double) STORED AS TEXTFILE;\
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
store a into 'pig_checkin_1' using org.apache.hcatalog.pig.HCatStorer();\,
,'result_table' => 'pig_checkin_1'
,'sql' => q\select * from studenttab10k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 2
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = load 'votertab10k' using org.apache.hcatalog.pig.HCatLoader();
c = join a by name, b by name;
store c into ':OUTPATH:';\,
,'sql' => [ 'select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);']
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 3
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = load ':INPATH:/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:float);
c = join a by name, b by name;
store c into ':OUTPATH:';\
,'sql' => q\select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 4
,'hcat_prep'=>q\drop table if exists pig_checkin_4_1;
drop table if exists pig_checkin_4_2;
create table pig_checkin_4_1 (name string, age int, gpa double) STORED AS TEXTFILE;
create table pig_checkin_4_2 (name string, age int, gpa double) STORED AS TEXTFILE;\
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
split a into b if age <=40, c if age > 40;
store b into 'pig_checkin_4_1' using org.apache.hcatalog.pig.HCatStorer();
store c into 'pig_checkin_4_2' using org.apache.hcatalog.pig.HCatStorer();\,
,'result_table' => ['pig_checkin_4_1','pig_checkin_4_2']
,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 5
,'hcat_prep'=>q\drop table if exists pig_checkin_5;
create table pig_checkin_5 (name string, age int, gpa double) STORED AS TEXTFILE;\
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
split a into b if age <=40, c if age > 40;
store b into 'pig_checkin_5' using org.apache.hcatalog.pig.HCatStorer();
store c into ':OUTPATH:';\,
,'result_table' => ['pig_checkin_5','?']
,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 6
,'hcat_prep'=>q\drop table if exists pig_checkin_6;
create table pig_checkin_6 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\
,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader();
b = filter a by ds == '20110924';
c = foreach b generate name, age;
store c into 'pig_checkin_6' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\,
#dump a;\,
,'result_table' => 'pig_checkin_6',
,'sql' => "select name, age, ds from studentparttab30k where ds='20110924';",
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 7
,'hcat_prep'=>q\drop table if exists pig_checkin_7;
create table pig_checkin_7 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\
,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, ds;
store b into 'pig_checkin_7' using org.apache.hcatalog.pig.HCatStorer();\,
,'result_table' => 'pig_checkin_7',
,'sql' => "select name, age, ds from studentparttab30k;",
,'floatpostprocess' => 1
,'delimiter' => ' '
}
],
}, # end g
{
'name' => 'Pig_Read',
'tests' => [
{
'ignore' => 1, # Need to checkin HCATALOG-168.
'num' => 1
,'pig' => q\a = load 'all100k' using org.apache.hcatalog.pig.HCatLoader();
store a into ':OUTPATH:';\,
,'sql' => q\select * from all100k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 2
,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate s, i, d;
store b into ':OUTPATH:';\,
,'sql' => q\select s, i, d from all100kjson;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 3
,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age;
store b into ':OUTPATH:';\,
,'sql' => q\select name, age from all100krc;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
# A table with one partition in text and one in rc
'num' => 4
,'hcat_prep'=>q?drop table if exists pig_read_4;
create external table pig_read_4 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile;
alter table pig_read_4 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';
alter table pig_read_4 set fileformat rcfile;
alter table pig_read_4 add partition (b='2') location '/user/hcat/tests/data/all100krc';?
,'pig' => q\a = load 'pig_read_4' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, b;
store b into ':OUTPATH:';\,
,'sql' => q\(select name, age, 1 from studenttab10k)
union all
(select name, age, 2 from all100krc);\
},
{
# Read from a table in the non-default database
'num' => 5
,'hcat_prep'=>q?create database if not exists pig_db_1;
drop table if exists pig_db_1.pig_read_5;
create external table pig_db_1.pig_read_5 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile;
use pig_db_1;
alter table pig_read_5 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';?
,'pig' => q\a = load 'pig_db_1.pig_read_5' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, b;
store b into ':OUTPATH:';\,
,'sql' => q\select name, age, 1 from studenttab10k;\
}
],
}, # end g
{
'name' => 'Pig_Write',
'tests' => [
{
'ignore' => 1, # Need to checkin HCATALOG-168.
'num' => 1
,'hcat_prep'=>q\drop table if exists pig_write_1;
create table pig_write_1(t tinyint,si smallint,i int,b bigint,bool boolean,f float,d double,s string) stored as rcfile;\
,'pig' => q\a = load ':INPATH:/all100k' using PigStorage(':') as (t:int,si:int,i:int,b:int,bo:boolean,f:float,d:double,s:chararray);
store a into 'pig_write_1' using org.apache.hcatalog.pig.HCatStorer();\,
,'result_table' => 'pig_write_1'
,'sql' => q\select * from all100k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 2
,'hcat_prep'=>q\drop table if exists pig_write_2;
create table pig_write_2(
s string,
i int,
d double,
m map<string, string>,
bb array<struct<a: int, b: string>>)
row format serde 'org.apache.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE;
\
,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate s, i, d;
store b into 'pig_write_2' using org.apache.hcatalog.pig.HCatStorer();\,
,'sql' => q\select IFNULL(s, ""), IFNULL(i, ""), IFNULL(d, "") from all100kjson;\
,'result_table' => 'pig_write_2'
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
'num' => 3
,'hcat_prep'=>q\drop table if exists pig_write_3;
create table pig_write_3(
name string,
age int,
gpa double)
stored as rcfile;
\
,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age;
store b into 'pig_write_3' using org.apache.hcatalog.pig.HCatStorer();\,
,'sql' => q\select name, age from all100krc;\
,'result_table' => 'pig_write_3'
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
# Store in a sequence file
'num' => 4
,'hcat_prep'=>q\drop table if exists pig_write_4;
create table pig_write_4(
name string,
age int,
gpa double)
stored as sequencefile;
\
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, 0.1;
c = foreach b generate name, age, $2 as gpa;
store c into 'pig_write_4' using org.apache.hcatalog.pig.HCatStorer();\,
,'sql' => q\select name, age, 0.1 from studenttab10k;\
,'result_table' => 'pig_write_4'
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
# Write to a table in the non-default database
'num' => 5
,'hcat_prep'=>q?create database if not exists pig_db_1;
create table if not exists pig_db_1.pig_write_5 (name string, age int) row format delimited fields terminated by '\t' stored as textfile;?
,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name, age;
store b into 'pig_db_1.pig_write_5' using org.apache.hcatalog.pig.HCatStorer();\,
,'sql' => q\select name, age from studenttab10k;\
,'result_table' => 'pig_db_1.pig_write_5'
}
],
}, # end g
{
'name' => 'Pig_Change_Schema',
'tests' => [
{
# I don't like this, I'm using one test to setup for the next. But I don't know how else to do this.
'num' => 1
,'hcat_prep'=>q\drop table if exists pig_change_schema_1;
create table pig_change_schema_1 (name string) partitioned by (ds string) STORED AS TEXTFILE;\
,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader();
b = filter a by ds == '20110924';
c = foreach b generate name;
store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\,
,'result_table' => 'pig_change_schema_1'
,'sql' => q\select name, ds from studentparttab30k where ds='20110924';\
},
{
# I don't like this, I'm using one test to setup for the next. But I don't know how else to do this.
'num' => 2
,'depends_on' => 'Pig_Change_Schema_1'
,'hcat_prep'=>q\alter table pig_change_schema_1 add columns (age int);\
,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader();
b = filter a by ds == '20110925';
c = foreach b generate name, age;
store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110925');\,
,'result_table' => 'pig_change_schema_1'
,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924')
union all
(select name, age, ds from studentparttab30k where ds = '20110925');\
},
{
# I don't like this, I'm using one test to setup for the next. But I don't know how else to do this.
'num' => 3
, 'depends_on' => 'Pig_Change_Schema_2'
,'pig' => q\a = load 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatLoader();
c = foreach a generate name, age, ds;
store c into ':OUTPATH:';\
,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924')
union all
(select name, age, ds from studentparttab30k where ds = '20110925');\
}
],
},
{
'name' => 'Pig_HBase',
'tests' => [
{
'num' => 1
,'hcat_prep'=>q\drop table if exists pig_hbase_1;
create table pig_hbase_1(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa');\
,'pig' => q\set hcat.hbase.output.bulkMode 'false'
a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float);
b = group a by name;
c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa;
d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa;
store d into 'pig_hbase_1' using org.apache.hcatalog.pig.HCatStorer();
exec
e = load 'pig_hbase_1' using org.apache.hcatalog.pig.HCatLoader();
store e into ':OUTPATH:';\,
,'result_table' => ['pig_hbase_1','?']
,'sql' => [ 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;' ]
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
# multiquery
'num' => 2
,'hcat_prep'=>q\drop table if exists pig_hbase_2_1;
create table pig_hbase_2_1(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa');
drop table if exists pig_hbase_2_2;
create table pig_hbase_2_2(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa');
\
,'pig' => q\set hcat.hbase.output.bulkMode 'false'
a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float);
b = group a by name;
c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa;
d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa;
store d into 'pig_hbase_2_1' using org.apache.hcatalog.pig.HCatStorer();
store d into 'pig_hbase_2_2' using org.apache.hcatalog.pig.HCatStorer();\,
,'result_table' => ['pig_hbase_2_1','pig_hbase_2_2']
,'sql' => [ 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;']
,'floatpostprocess' => 1
,'delimiter' => ' '
}
],
}, # end g
{
'name' => 'Pig_HCat_Barrier',
'tests' => [
{
'num' => 1
,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_1;
create table pig_hcat_barrier_1 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\
,'pig' => q\
a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double);
store a into 'pig_hcat_barrier_1' using org.apache.hcatalog.pig.HCatStorer('b=1'); \,
,'expected_err_regex' => 'not supported'
},
{
'num' => 2
,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_2;
create table pig_hcat_barrier_2 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) SORTED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\
,'pig' => q\
a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double);
store a into 'pig_hcat_barrier_2' using org.apache.hcatalog.pig.HCatStorer('b=1'); \,
,'expected_err_regex' => 'not supported'
},
],
}, # end g
{
'name' => 'Pig_HCAT_COOP',
'tests' => [
{
# test if Pig can load the table after various table schema change
'num' => 1
,'hcat_prep'=>q:drop table if exists pig_hcat_coop_1;
create external table pig_hcat_coop_1 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as TEXTFILE;
alter table pig_hcat_coop_1 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';
alter table pig_hcat_coop_1 partition(b='1') set fileformat TEXTFILE;
alter table pig_hcat_coop_1 change gpa registration string;
alter table pig_hcat_coop_1 add columns (contributions float);
alter table pig_hcat_coop_1 add partition (b='2') location '/user/hcat/tests/data/votertab10k';
alter table pig_hcat_coop_1 partition(b='2') set fileformat TEXTFILE;
alter table pig_hcat_coop_1 replace columns (name string, age int);
:
,'pig' => q\
a = load 'pig_hcat_coop_1' using org.apache.hcatalog.pig.HCatLoader();
store a into ':OUTPATH:';\,
,'sql' => q\select name, age, '1' from studenttab10k union all select name, age, '2' from votertab10k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},
{
# test if Pig can load table after fileformat change and table schema change
'num' => 2
,'hcat_prep'=>q:drop table if exists pig_hcat_coop_2;
create external table pig_hcat_coop_2 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as TEXTFILE;
alter table pig_hcat_coop_2 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';
alter table pig_hcat_coop_2 partition(b='1') set fileformat TEXTFILE;
alter table pig_hcat_coop_2 add partition (b='2') location '/user/hcat/tests/data/all100krc';
alter table pig_hcat_coop_2 partition(b='2') set fileformat RCFILE;
alter table pig_hcat_coop_2 replace columns (age int, name string);
:
,'pig' => q\
a = load 'pig_hcat_coop_2' using org.apache.hcatalog.pig.HCatLoader();
store a into ':OUTPATH:';\,
,'sql' => q\select age, name, '1' from studenttab10k union all select age, name, '2' from all100krc;\
,'floatpostprocess' => 1
,'delimiter' => ' '
}
],
},{
'name' => 'Pig_Complex',
'tests' => [
{
# test reading tuples from the complex table
'num' => 1
,'pig' => q\
a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate flatten(s);
store b into ':OUTPATH:';\,
,'sql' => q\select IFNULL(name, ""), IFNULL(age, ""), IFNULL(gpa, "") from studentcomplextab10k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# test reading maps from the complex table
'num' => 2
,'pig' => q\
a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate s.name as n1, m#'name' as n2;
c = filter b by n1 != '' and n2 is not null;
store c into ':OUTPATH:';\,
,'sql' => q\select t.name, m.mvalue
from studentcomplextab10k t, studentcomplextab10k_map m
where t.id = m.tid and t.name is not null and m.mkey = 'name';\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# test reading arrays from the complex table
'num' => 3
,'pig' => q\
a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate s.name as n1, flatten(a);
c = filter b by n1 != '' ;
store c into ':OUTPATH:';\,
,'sql' => q\select t.name, m.lvalue
from studentcomplextab10k t, studentcomplextab10k_list m
where t.id = m.tid and t.name is not null;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# test writing tuples to a complex table. This also tests reading with default separators.
'num' => 4
,'notmq' => 1
,'hcat_prep'=>q\drop table if exists pig_complex_4;
create table pig_complex_4 (s struct<name: string, age: int, gpa: double>) STORED AS TEXTFILE;\
,'pig' => q\
a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate TOTUPLE(name, age, gpa) as s;
store b into 'pig_complex_4' using org.apache.hcatalog.pig.HCatStorer();
exec;
c = load 'pig_complex_4' using org.apache.hcatalog.pig.HCatLoader();
d = foreach c generate flatten(s);
store d into ':OUTPATH:';\
,'sql' => q\select name, age, gpa from studenttab10k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# test writing maps to a complex table. This also tests reading with default separators.
'num' => 5
,'notmq' => 1
,'hcat_prep'=>q\drop table if exists pig_complex_5;
create table pig_complex_5 (m map<string, string>) STORED AS TEXTFILE;\
,'pig' => q\
a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate TOMAP('name', name, 'age', (chararray)age, 'gpa', (chararray)gpa) as m;
store b into 'pig_complex_5' using org.apache.hcatalog.pig.HCatStorer();
exec;
c = load 'pig_complex_5' using org.apache.hcatalog.pig.HCatLoader();
d = foreach c generate m#'name', m#'age', m#'gpa';
store d into ':OUTPATH:';\
,'sql' => q\select name, age, gpa from studenttab10k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
}, {
# test writing bags to a complex table. This also tests reading with default separators.
'num' => 6
,'notmq' => 1
,'hcat_prep'=>q\drop table if exists pig_complex_6;
create table pig_complex_6 (a array<string>) STORED AS TEXTFILE;\
,'pig' => q\
a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
b = foreach a generate name;
c = distinct b;
d = group c all;
e = foreach d generate $1 as a;
store e into 'pig_complex_6' using org.apache.hcatalog.pig.HCatStorer();
exec;
f = load 'pig_complex_6' using org.apache.hcatalog.pig.HCatLoader();
g = foreach f generate flatten(a);
store g into ':OUTPATH:';\
,'sql' => q\select distinct name from studenttab10k;\
,'floatpostprocess' => 1
,'delimiter' => ' '
}
],
} # end g
]
}