blob: 14593731c09fa9034e9c8c2b0f861ad6fa19e975 [file] [log] [blame]
#!/usr/bin/env perl
############################################################################
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################
# Nightly tests for pig.
#
#
#PigSetup::setup();
#my $me = `whoami`;
#chomp $me;
$cfg = {
'driver' => 'Pig',
'nummachines' => 5,
'groups' => [
{
'name' => 'Describe_cmdline',
'floatpostprocess' => 0,
'delimiter' => ' ',
'tests' => [
#JIRA[PIG-372]
#JIRA[PIG-374]
#JIRA[PIG-384]
{
'num' => 1,
'pig' => q\
A= load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
describe A;\,
'expected_out_regex' => "A: {name: bytearray,age: bytearray,gpa: bytearray}",
},
#JIRA[PIG-19], Commented out until fixed.
# {
# 'num' => 2,
# 'pig' => q\
#A=load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
#describe A;\,
# 'expected_out_regex' => "A: {name: bytearray,age: bytearray,gpa: bytearray}",
# },
#JIRA[PIG-373]
{
'num' => 3,
'pig' => q\
A= load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
describe A;\,
'expected_out_regex' => "A: {name: chararray,age: int,gpa: double}",
},
#********************************************************
#QUESTION: S/B SQL VERIFIER for DUMP statement?
#********************************************************
# #JIRA[PIG-373]
# {
# 'num' => 4,
# 'java_params' => ['-Dopt.fetch=false'],
# 'pig' => q\
#A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray);
#describe A;
#A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray);
#dump A;\,
#
# 'sql' => "select name, age, gpa from studenttab10k;",
# },
#JIRA[PIG-373]
#JIRA[PIG-405]
{
'num' => 5,
'pig' => q\
A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y);
describe A;\,
'expected_out_regex' =>"A: {m: map\\[\\],x: bytearray,y: bytearray}",
},
#JIRA[PIG-373]
#JIRA[PIG-405]
{
'num' => 6,
'pig' => q\
A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y);
B= foreach A generate m;
describe A;
describe B;\,
#Expect
# A : { m:map, x:bytearray, y:bytearray }
# B : { m :map }
'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}",
},
#JIRA[PIG-373]
#JIRA[PIG-405]
{
'num' => 7,
'pig' => q\
A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y);
describe A;
B= foreach A generate m;
describe B;\,
#Expect
# A : { m:map, x:bytearray, y:bytearray }
# B : { m :map }
'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}",
},
#JIRA[PIG-373]
#JIRA[PIG-405]
{
'num' => 8,
'pig' => q\
A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y);
B= foreach A generate m;
describe B;
describe A;\,
#Expect
# A : { m:map, x:bytearray, y:bytearray }
# B : { m :map }
'expected_out_regex' => "B: {m: map\\[\\]}\nA: {m: map\\[\\],x: bytearray,y: bytearray}",
},
{
'num' => 14,
'pig' => q\A = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
describe A;\,
'expected_out_regex' => "A: {name: chararray,age: int,gpa: double,instate: boolean}",
},
#JIRA[PIG-379]
{
'num' => 9,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
describe A;
B= foreach A generate name, age;
describe B;
C= filter B by age > 30;
describe C;
D= group C by name;
describe D;\,
# EXPECT
# A: {name: chararray,age: integer,gpa: double}
# B: {name: chararray,age: integer}
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_out_regex' =>
"A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {\\(name: chararray,age: int\\)}}",
},
{
'num' => 10,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
B= foreach A generate name, age;
C= filter B by age > 30;
D= group C by name;
describe A;
describe B;
describe C;
describe D;\,
# EXPECT
# A: {name: chararray,age: integer,gpa: double}
# B: {name: chararray,age: integer}
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_out_regex' =>
"A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {\\(name: chararray,age: int\\)}}\n",
},
{
'num' => 11,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
B= foreach A generate name, age;
C= filter B by age > 30;
D= group C by name;
describe D;
describe C;
describe B;
describe A;\,
# EXPECT
# A: {name: chararray,age: integer,gpa: double}
# B: {name: chararray,age: integer}
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_out_regex' =>
"D: {group: chararray,C: {\\(name: chararray,age: int\\)}}\nC: {name: chararray,age: int}\nB: {name: chararray,age: int}\nA: {name: chararray,age: int,gpa: float}",
},
{
'num' => 12,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
B= foreach A generate name, age;
C= filter B by age > 30;
D= group C by name;
describe X;
describe D;
describe C;
describe B;
describe A;\,
# EXPECT
# A: {name: chararray,age: integer,gpa: double}
# B: {name: chararray,age: integer}
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_err_regex' => "ERROR 1003: Unable to find an operator for alias X"
},
{
'num' => 13,
'pig' => q\
A = LOAD ':INPATH:/singlefile/studenttab10k' AS (name: chararray, age: int, gpa: float);
B = LOAD 'voter_data' AS (name: chararray, age: int, registration: chararray, contributions: float);
C = COGROUP A BY name, B BY name;
D = FOREACH C GENERATE group, flatten((not IsEmpty(A) ? A : (bag{tuple(chararray, int, float)}){(null, null, null)})), flatten((not IsEmpty(B) ? B : (bag{tuple(chararray, int, chararray, float)}){(null,null,null, null)}));
describe D;\,
# EXPECT
# D: {group: chararray,A::name: chararray,A::age: int,A::gpa: float,B::name: chararray,B::age: int,B::registration: chararray,B::contributions: float}
'expected_out_regex' => "D: {group: chararray,A::name: chararray,A::age: int,A::gpa: float,B::name: chararray,B::age: int,B::registration: chararray,B::contributions: float}"
}
],
},
{
'name' => 'Unicode_cmdline',
'floatpostprocess' => 0,
'delimiter' => ' ',
'tests' => [
{
'num' => 1,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
A = load ':INPATH:/singlefile/unicode100' as (name:chararray);
dump A;\,
'expected_out_regex' => ":Unicode_cmdline_1_output:"
},
],
},
{
'name' => 'Warning',
'floatpostprocess' => 0,
'execonly' => 'mapred,tez,spark', # Warnings use counters, which don't work in local mode
'delimiter' => ' ',
'tests' => [
{
#Checking divide by zero warning
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;
c = foreach b generate (norm_gpa is null? 0 :norm_gpa);
store c into ':OUTPATH:';\,
'expected_err_regex' => ":Warning_1_err:",
},
{
#Checking field discarded warning
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age:int, gpa:double);
b = foreach a generate (int)name;
store b into ':OUTPATH:';\,
'expected_err_regex' => "Encountered Warning FIELD_DISCARDED_TYPE_CONVERSION_FAILED 10000 time.*",
},
{
#Checking type cast warnings
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age, gpa);
b = foreach a generate age + 1, gpa + 0.1f;
describe b;\,
'expected_err_regex' => "(Encountered Warning IMPLICIT_CAST_TO_INT 1 time.*\n.*Encountered Warning IMPLICIT_CAST_TO_FLOAT 1 time.*)|(Encountered Warning IMPLICIT_CAST_TO_FLOAT 1 time.*\n.*Encountered Warning IMPLICIT_CAST_TO_INT 1 time.*)",
},
{
#Checking udf warnings
'num' => 4,
'pig' => q\
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age: int, gpa: float);
b = foreach a generate org.apache.pig.test.udf.evalfunc.TestWarningFunc(name, age, gpa);
store b into ':OUTPATH:';\,
'expected_err_regex' => ":Warning_4_err:",
},
{
#Checking non existent field warnings
'num' => 5,
'pig' => q\
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studentnulltab10k';
b = foreach a generate $3;
store b into ':OUTPATH:';\,
'expected_err_regex' => "Encountered Warning ACCESSING_NON_EXISTENT_FIELD 10000 time.*",
},
],
},
{
# 1 Test that a nested foreach gives instant feedback: after user issue the foreach statement in Grunt
# 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias
# 3 Describe single Alias resulting from a nested Foreach
# 4 Describe multiple Alias resulting from a nested Foreach
# 5 Describe resulting from a nested Foreach that contains a positional parameter
# 6 Describe for child Alias resulting from a nested Foreach where the child alias had multiple assignments The expected behavior is that the last assignment will destermine the result of the describe statement.
# 7 Describe for an Alias resulting from a nested Foreach where the projection for the nested alias is empty
# 8 Describe within a foreach statement
# 9 Describe for alias with complex data types
# 10 Describe that uses references an alias from an AS clause
'name' => 'NestedDescribe',
'tests' => [
{
# 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias
# 3 Describe single Alias resulting from a nested Foreach
'num' => 1,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B = group A by name;
C = foreach B { D = distinct A.age; generate COUNT(D), group;}
describe C::D;
\,
,'expected_out_regex' => "D: {age: bytearray}"
},{
# 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias
# 7 Describe for an Alias resulting from a nested Foreach where the projection for the nested alias is empty
'num' => 2,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B = group A by name;
C = foreach B { D = distinct A.age; E= filter D by age > 1000; generate COUNT(E), group;}
describe C;
\,
,'expected_out_regex' => "C: {long,group: bytearray}"
},{
# 1 Test that a nested foreach gives instant feedback: after user issue the foreach statement in Grunt
# 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias
# 3 Describe single Alias resulting from a nested Foreach
# 4 Describe multiple Alias resulting from a nested Foreach
# 5 Describe resulting from a nested Foreach that contains a positional parameter
'num' => 3,
'pig' => q\
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B = group A by name;
C = foreach B { D = distinct A.$1; generate COUNT(D), group;}
describe C::D;
\,
,'expected_out_regex' => "D: {age: bytearray}"
},
]
}
]
}
;