blob: bbd66569ee0e4bc4c9add96110ec3b127b5cc1da [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
###############################################################################
# Nightly tests for pig.
#
# Author: Alan F. Gates (gates@)
# $Header:$
#
#use Yahoo::Miners::Test::PigSetup;
#PigSetup::setup();
#my $me = `whoami`;
#chomp $me;
$cfg = {
'driver' => 'Pig',
'groups' => [
{
'name' => 'Macro_DefinitionAndInline',
'tests' => [
{
# simple macro, no args
'num' => 1,
'pig' => q#define simple_macro() returns void {
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate age, name;
store b into ':OUTPATH:';
}
simple_macro();#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate age, name;
store b into ':OUTPATH:';#,
},{
# input args, no return
'num' => 2,
'pig' => q#define simple_macro(loadfile) returns void {
a = load '$loadfile' as (name, age, gpa);
b = foreach a generate age, name;
store b into ':OUTPATH:';
}
simple_macro(':INPATH:/singlefile/studenttab10k');#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate age, name;
store b into ':OUTPATH:';#,
},{
# input args, return value
'num' => 3,
'pig' => q#define simple_macro(loadfile) returns b {
a = load '$loadfile' as (name, age, gpa);
$b = foreach a generate age, name;
}
x = simple_macro(':INPATH:/singlefile/studenttab10k');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate age, name;
store b into ':OUTPATH:';#,
},
{
# input args, filter on double and int, return value
'num' => 4,
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
},
{
#Definition multiple input, no output, multiple returns value
#x = with multiple input, no output, multiple returns value
#Query based on FilterEq from nightly.conf
'num' => 5,
'pig' => q\define test (in1, in2) returns r1, r2 {
a = load '$in1' using PigStorage() as (name, age, gpa);
$r1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
b = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
$r2 = filter b by name matches 'f.ed' and (chararray)registration matches 'd.m';
}
x1, x2 = test(':INPATH:/singlefile/studenttab10k', ':INPATH:/singlefile/votertab10k');
store x1 into ':OUTPATH:.1' using PigStorage;
store x2 into ':OUTPATH:.2' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
a1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
store a1 into ':OUTPATH:.1' using PigStorage;
b = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
b2 = filter b by name matches 'f.ed' and (chararray)registration matches 'd.m';
store b2 into ':OUTPATH:.2' using PigStorage;\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
{
# use positional parameters inside macro
'num' => 6,
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
b = filter $in_relation by $2 >= $min_gpa and $1 <= $max_age;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
},
{
# Test nested macros
'num' => 7,
'pig' => q\define sum_it(in, relation, scol) returns d {
$d = foreach $in generate group, SUM($relation.$scol);
}
define group_it(in_relation, group_key, sum_col) returns c {
b = group $in_relation by $group_key ;
$c = sum_it(b, $in_relation, $sum_col);
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = group_it(a, 'name', 'age');
store x into ':OUTPATH:';\,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, SUM(a.age);
store c into ':OUTPATH:';#,
},
{
# single macro definition invoked multiple times
'num' => 8,
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:.1';
z = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
y = simple_macro(a, '2.0', '50');
store y into ':OUTPATH:.2';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:.1';
d = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
e = filter d by gpa >= 2.0 and age <= 50;
f = foreach e generate age, name;
store f into ':OUTPATH:.2';#,
},
{
# macro arg used as function arg
'num' => 9,
'pig' => q#define simple_macro(loadfile, sep) returns b {
a = load '$loadfile' using PigStorage('$sep') as (name, age, gpa);
$b = foreach a generate age, name;
}
x = simple_macro(':INPATH:/singlefile/studentcolon10k', ':');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
b = foreach a generate age, name;
store b into ':OUTPATH:';#,
},
{
# Multiple returns via split in the data flow
'num' => 10,
'pig' => q\define test (in1) returns r1, r2 {
a = load '$in1' using PigStorage() as (name, age, gpa);
$r1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
$r2 = filter a by name > 'fred';
}
x1, x2 = test(':INPATH:/singlefile/studenttab10k');
store x1 into ':OUTPATH:.1' using PigStorage;
store x2 into ':OUTPATH:.2' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
a1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
store a1 into ':OUTPATH:.1' using PigStorage;
a2 = filter a by name > 'fred';
store a2 into ':OUTPATH:.2' using PigStorage;\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
{
# parameter substitution at the top level
'num' => 11,
'pig_params' => ['-p', qq(loadfile='singlefile/studenttab10k')],
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
$c = foreach b generate age, name;
}
a = load ':INPATH:/$loadfile' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
}
]
},
{
'name' => 'Macro_Scope',
'tests' => [
{
# re-use of variable in macro and global scope
'num' => 1,
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns b {
$b = filter $in_relation by $2 >= $min_gpa and $1 <= $max_age;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
b = foreach x generate age, name;
store b into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
},
{
#Definition where there is a name collision between parameters in parent/child macro
'num' => 2,
'pig' => q\define sum_it(in_relation, relation, sum_col) returns c {
b = foreach $in_relation generate group, SUM($relation.$sum_col);
$c = order b by $1;
}
define group_it(in_relation, group_key, sum_col) returns c {
b = group $in_relation by $group_key ;
$c = sum_it(b, $in_relation, $sum_col);
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = group_it(a, name, age);
store x into ':OUTPATH:';\,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, SUM(a.age);
store c into ':OUTPATH:';#,
},
{
#Definition where there is a name collision between macro and returns value in main pig script
'num' => 3,
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = simple_macro(a, '3.0', '40');
store c into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
}
]
},
{
'name' => 'Macro_Schema',
'tests' => [
{
# macro that does not change the schema
'num' => 1,
'pig' => q\define test(in) returns a {
$a = filter $in by age > 30;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = test(a);
describe b;\,
'rc' => 0,
'expected_out_regex'=> "b: \{name: chararray,age: int,gpa: double}"
},
{
# macro that does change the schema
'num' => 2,
'pig' => q\define test(in) returns a {
$a = foreach $in generate name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = test(a);
describe b;\,
'rc' => 0,
'expected_out_regex'=> "b: \{name: chararray}"
}
]
},
{
'name' => 'Macro_Misc',
'tests' => [
{
#Comments in macro
'num' => 1,
'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
$c = foreach b generate age, name;
-- add a comment
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
},
{
#register
'num' => 2,
'pig' => q\define test (in) returns b {
$b = foreach $in generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
}
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = test(a);
store x into ':OUTPATH:';\,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
register :FUNCPATH:/testudf.jar;
b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
store b into ':OUTPATH:';#,
},
{
#define for streaming combines with define for macros
'num' => 3,
,'pig' => q#define CMD `perl -ne 'print $_;'`;
define test(in) returns B {
$B = stream $in through CMD as (name, age, gpa);
}
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = test(A);
store x into ':OUTPATH:';#,
'pig_win' => q#define CMD `perl -ne "print $_;"`;
define test(in) returns B {
$B = stream $in through CMD as (name, age, gpa);
}
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = test(A);
store x into ':OUTPATH:';#,
'verify_pig_script' => q#A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';#,
'floatpostprocess' => 1,
'delimiter' => ' '
},
{
#JIRA: PIG-2681
'num' => 4,
,'pig' => q\
define test (in,out) returns b {
a = load '$in' as (name, age, gpa);
$b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
store $b into '$out';
}
register :FUNCPATH:/testudf.jar;
x = test(':INPATH:/singlefile/studenttab10k',':OUTPATH:');
\,
}
]
},
{
'name' => 'Macro_Import',
'tests' => [
{
'num' => 1,
'pig' => q#import ':SCRIPTHOMEPATH:/macro1.pig';
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by gpa >= 3.0 and age <= 40;
c = foreach b generate age, name;
store c into ':OUTPATH:';#,
}
]
},
{
'name' => 'Macro_Error',
'tests' => [
{
# parameter names repeated
'num' => 1,
'ignore' => 'https://issues.apache.org/jira/browse/PIG-2247',
'pig' => q#define simple_macro(in_relation, min_gpa, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Multiple arguments min_gpa found"
},
{
# undefined parameter in macro
'num' => 2,
'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Macro inline failed for macro 'simple_macro'. Reason: Undefined parameter : max_age"
},
{
# name collision between arg and return value
'num' => 3,
'pig' => q#define simple_macro(in_relation, min_gpa, c) returns c {
b = filter $in_relation by gpa >= $min_gpa and age <= $c;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Multiple values found for c"
},
{
# keyword as macro name
'num' => 4,
'pig' => q#define foreach(in_relation, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "mismatched input 'foreach' expecting IDENTIFIER"
},
{
# UDF as macro name
'num' => 5,
'ignore' => 'https://issues.apache.org/jira/browse/PIG-2248',
'pig' => q#define COUNT(in_relation, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = COUNT(a, '3.0');
store x into ':OUTPATH:';#,
'expected_err_regex' => "macro name hides UDF COUNT"
},
{
# redefine a macro
'num' => 6,
'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
define simple_macro(in, min_age) returns d {
b = filter $in by age >= $min_age;
$d = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Duplicated macro name 'simple_macro'"
},
{
# invoke non-existent macro
'num' => 7,
'pig' => q#
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = nosuch_macro('a', '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Cannot expand macro 'nosuch_macro'. Reason: Macro must be defined before expansion."
},
{
# Specifies two returns, but only actually returns one
'num' => 8,
'pig' => q#define simple(in_relation, min_gpa) returns c,d {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x, y = simple(a, '3.0');
store x into ':OUTPATH:.1';
store y into ':OUTPATH:.2';#,
'expected_err_regex' => "Invalid macro definition: . Reason: Macro 'simple' missing return alias: d"
},
{
# syntax error in a macro, check for correct line number
'num' => 9,
'pig' => q#define simple(in_relation, min_gpa) returns c {
b = fiter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple(a, '3.0');
store x into ':OUTPATH:';#,
'expected_err_regex' => "line 2"
},
{
# too many args passed to macro
'num' => 10,
'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Failed to expand macro 'simple_macro'. Reason: Expected number of parameters: 2 actual number of inputs: 3"
},
{
# return two values, but script only accepts 1
'num' => 11,
'pig' => q#define simple(in_relation, min_gpa) returns c,d {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
$d = foreach b generate name, age;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple(a, '3.0');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Failed to expand macro 'simple'. Reason: Expected number of return aliases: 2 actual number of return values: 1"
},
{
# return 1 value, but script expects 2
'num' => 12,
'pig' => q#define simple(in_relation, min_gpa) returns c {
b = filter $in_relation by gpa >= $min_gpa;
$c = foreach b generate age, name;
}
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x, y = simple(a, '3.0');
store x into ':OUTPATH:.1';
store y into ':OUTPATH:.2';#,
'expected_err_regex' => "Failed to expand macro 'simple'. Reason: Expected number of return aliases: 1 actual number of return values: 2"
}
]
},
{
'name' => 'Macro_Import_Error',
'tests' => [
{
# import non-existent file
'num' => 1,
'ignore' => 1, # different error message for different version of hadoop
'pig' => q#import 'nosuchfile';
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro('a', '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Failed to import file 'nosuchfile'. Reason: Can't find the Specified file nosuchfile"
},
{
# import a macro with a syntax error
'num' => 2,
'pig' => q#import ':SCRIPTHOMEPATH:/macro_bad1.pig';
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
'expected_err_regex' => "Invalid macro definition"
}
]
}
],
},
;
# import non-existent file, import script with error