blob: 8663f289db069c7e8c45d9eae0cf94875b3d1906 [file] [log] [blame]
#!/usr/bin/env perl
############################################################################
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################
# Nightly tests for pig.
#
#
#PigSetup::setup();
$cfg = {
'driver' => 'Pig',
'nummachines' => 5,
'groups' => [
{
'name' => 'HadoopError',
'tests' => [
{
'num' => 1,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, COUNT($5);
dump c;\,
'expected_err_regex' => "Out of bound access. Trying to access non-existent column: 5.",
},
]
},
{
'name' => 'NoSuchFile',
'tests' => [
{
'num' => 1,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\a = load '/user/gates/nosuchfile'; dump a;\,
'expected_err_regex' => "ERROR 2118: Input path does not exist",
},
{
'num' => 2,
'pig' => q\register bla.jar\,
'expected_err_regex' => "ERROR 101: file 'bla.jar' does not exist.",
}
]
},
{
'name' => 'BadFunc',
'tests' => [
{ # PIG-431
'num' => 1,
'pig' => "a = load ':INPATH:/singlefile/studenttab10k' using NoSuchFunction(':');",
'expected_err_regex' => "Could not resolve NoSuchFunction using imports",
},
]
},
{
'name' => 'FileExists',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
store a into ':INPATH:/singlefile/fileexists';\,
'expected_err_regex' => ".* already exists",
},
]
},
{
'name' => 'NegForeach',
'tests' => [
{
'num' => 1,
'ignore' => 1, # it is valid now
# testing that nested foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { ba = filter a by age < '25'; bb = foreach ba generate gpa; generate group, flatten(bb);}\,
'expected_err_regex' => " Encountered \" \"foreach\" \"foreach \"\" at line.*\nWas expecting one of:",
},
{
'num' => 2,
# testing that group within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { ba = filter a by age < '25'; g = group ba by gpa; x = foreach g generate group, flatten(g); generate x;}\,
'expected_err_regex' => "Syntax error, unexpected symbol at or near 'ba'",
},
{
'num' => 3,
# testing that cogroup within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = cogroup a by (name, age), b by (name, age) ;
d = foreach c { e = cogroup a by (name), b by (name); x= foreach a generate flatten(a), flatten(b); generate x;}\,
'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN",
},
{
'num' => 4,
# testing that join within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = cogroup a by (name), b by (name) ;
d = foreach c { e = join a by (age), b by (age); generate e;}\,
'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN",
},
{
'num' => 5,
'ignore' => 1, # it is valid now
# testing that cross within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = cogroup a by (name), b by (name) ;
d = foreach c { e = cross a,b; generate e;}\,
'expected_err_regex' => "Encountered \" \"cross\" \"cross \"\" at .*\nWas expecting one of",
},
{
'num' => 6,
# testing that union within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = cogroup a by (name), b by (name) ;
d = foreach c { e = union a,b; generate e;}\,
'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN",
},
{
'num' => 7,
# testing that split within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { split a into ba if age < '25', bb if age > '40'; generate group, COUNT(ba), COUNT(bb);}\,
'expected_err_regex' => "mismatched input 'split' expecting GENERATE",
},
{
'num' => 8,
# testing that load within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b {
d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
generate *;}\,
'expected_err_regex' => "mismatched input ''.*/singlefile/votertab10k'' expecting LEFT_PAREN",
},
{
'num' => 9,
# testing that store within foreach is not allowed
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { store a into ':OUTPATH:'; generate *;}\,
'expected_err_regex' => "mismatched input 'store' expecting GENERATE",
},
]
},
{
# test not allowed operations
'name' => 'NotAllowed',
'tests' => [
{
# currently (as of 09/18/2008), the following are not allowed
#a = b;
'num' => 1,
'ignore' => 1, # different error message for different version of hadoop
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = a;\,
'expected_err_regex' => "mismatched input ';' expecting LEFT_PAREN",
},
{
# currently (as of 09/18/2008), the following are not allowed
#a = b as (x,y,z);
'num' => 2,
'ignore' => 1, # different error message for different version of hadoop
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = a as (x,y,z);\,
'expected_err_regex' => "mismatched input 'as' expecting LEFT_PAREN",
},
]
},
{
# test with udf which throws exception
'name' => 'UdfException',
'tests' => [
{
'num' => 1,
'pig' => q\
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = cogroup a by name, b by name;
d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf(a,b));
store d into ':OUTPATH:';\,
'expected_err_regex' => "Out of bounds access",
},
{
'num' => 2,
'pig' => q\
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = cogroup a by name, b by name;
d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf2(a,b));
store d into ':OUTPATH:';\,
'expected_err_regex' => "Out of bounds access",
},
{
'num' => 3,
'pig' => q\
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = cogroup a by name, b by name;
d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf3(a,b));
store d into ':OUTPATH:';\,
'expected_err_regex' => "Out of bounds access",
},
{
'num' => 4,
'pig' => q\
register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = cogroup a by name, b by name;
d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf4(a,b));
store d into ':OUTPATH:';\,
'expected_err_regex' => "ERROR 2078: .*",
},
]
},
{
# test for sytax errors
'name' => 'SyntaxErrors',
'tests' => [
{
# missing quotes around command
'num' => 1,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q#
A = load ':INPATH:/singlefile/studenttab10k';
B = foreach A generate $2, $1, $0;
C = stream B through awk 'BEGIN {FS = "\t"; OFS = "\t"} {print $3, $2, $1}';
dump C;#,
#'expected_err_regex' => "mismatched input ''BEGIN {FS = \"\\t\"; OFS = \"\\t\"} {print \$"."3, \$"."2, \$"."1}'' expecting SEMI_COLON",
'expected_err_regex' => "mismatched input ''BEGIN {.*} {.*}'' expecting SEMI_COLON",
},
{
# input spec missing parenthesis
'num' => 2,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q#
define CMD `perl PigStreaming.pl foo -` input 'foo' using PigStorage() ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
'expected_err_regex' => "mismatched input ''foo'' expecting LEFT_PAREN",
},
{
# no serializer name after using
'num' => 3,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q#
define CMD `perl PigStreaming.pl foo -` output ('foo' using );
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
'expected_err_regex' => "Syntax error, unexpected symbol at or near '\\)'",
},
{
# alias name missing from define
'num' => 4,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q#
define `perl PigStreaming.pl foo -`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
'expected_err_regex' => "mismatched input '`perl PigStreaming.pl foo -`' expecting IDENTIFIER",
},
{
# quotes missing from name of the file in ship script
'num' => 5,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q#
define CMD `perl PigStreaming.pl foo -` ship(:SCRIPTHOMEPATH:/PigStreaming.pl);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
'expected_err_regex' => "mismatched input '.' expecting RIGHT_PAREN",
},
]
},
{
# relevant information missing/incorrect in streaming command
'name' => 'CmdErrors',
'tests' => [
{
# Define uses using non-existent command (autoship)
'num' => 1,
'execonly' => 'mapred,tez',
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreamingNotThere.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
'expected_err_regex' => "failed with exit status: 2",
},
{
# Define uses non-existent command with ship clause
'num' => 2,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreamingNotThere.pl foo -` ship(':SCRIPTHOMEPATH:/PigStreamingNotThere.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
'expected_err_regex' => "Invalid ship specification: .*/PigStreamingNotThere.pl.* does not exist!",
},
{
# Define uses non-existent command with cache clause
'num' => 3,
'pig' => q\
define CMD `perl PigStreaming.pl - - nameMap` ship(':SCRIPTHOMEPATH:/PigStreaming.pl') cache(':SCRIPTHOMEPATH:/PigStreamingNotThere.pl#NotThere');
A = load ':INPATH:/singlefile/studenttab10k';
B = foreach A generate $0;
C = stream B through CMD as (name);
D = group C by name;
E = foreach D generate group, COUNT(C);
dump E;\,
'expected_err_regex' => "Invalid cache specification: .*/PigStreamingNotThere.pl.*",
},
{
# Define uses non-existent serializer
'num' => 4,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreaming.pl foo -` input('foo' using SerializerNotThere()) ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
'expected_err_regex' => "Could not resolve SerializerNotThere using imports",
},
{
# Define uses non-existent deserializer
'num' => 5,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreaming.pl` output(stdout using DeserializerNotThere()) ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
'expected_err_regex' => "Could not resolve DeserializerNotThere using imports",
},
{
# Invalid skip path
'num' => 6,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
set stream.skippath 'foo';
define CMD `perl PigStreaming.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
'expected_err_regex' => "Invalid value for stream.skippath.*",
},
{
# Invalid command alias in stream operator
'num' => 7,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreaming.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD1;
dump B;\,
'expected_err_regex' => "pig script failed to validate: Undefined command-alias \\[CMD1\\]",
},
{
# Invalid operator alias in stream operator
'num' => 8,
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreaming.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream C through CMD;
dump B;\,
'expected_err_regex' => "Undefined alias: C",
},
]
},
{
# streaming application failures
'name' => 'StreamingErrors',
'tests' => [
{
# Streaming application fails in the beginning of processing
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 1,
'execonly' => 'mapred,tez',
'pig' => q\
define CMD `perl PigStreamingBad.pl start` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 1",
},
{
# Streaming application fails in the middle of processing
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 2,
'execonly' => 'mapred,tez',
'pig' => q\
define CMD `perl PigStreamingBad.pl middle` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 2",
},
{
# Streaming application fails in the end of processing
# bring logs to dfs
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 3,
'execonly' => 'mapred,tez',
'pig' => q\
define CMD `perl PigStreamingBad.pl end` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 3",
},
{
# Streaming application fails in the randomly during processing
# bring logs to dfs
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 4,
'execonly' => 'mapred,tez',
'pig' => q\
define CMD `perl DieRandomly.pl 10000 2` ship(':SCRIPTHOMEPATH:/DieRandomly.pl') stderr('CMD' limit 1);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 2",
},
]
},
{
# errors (de)serializing data for streaming
'name' => 'IOErrors',
'tests' => [
{
# Invalid deserializer - throws exception
'num' => 1,
'execonly' => 'mapred,tez',
'pig' => q\
register :FUNCPATH:/testudf.jar;
define CMD `perl PigStreaming.pl` input(stdin) output(stdout using org.apache.pig.test.udf.streaming.DumpStreamerBad) ship(':SCRIPTHOMEPATH:/PigStreaming.pl');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "Error reading output from Streaming binary",
},
{
# Invalid serializer - throws exception
'num' => 2,
'execonly' => 'mapred,tez',
'pig' => q\
define CMD `perl PigStreamingDepend.pl` input(stdin using StringStoreBad) ship(':SCRIPTHOMEPATH:/PigStreamingDepend.pl', ':SCRIPTHOMEPATH:/PigStreamingModule.pm');
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through `perl PigStreaming.pl`;
C = stream B through CMD as (name, age, gpa);
D = foreach C generate name, age;
store D into ':OUTPATH:';\,
'expected_err_regex' => "Could not resolve StringStoreBad using imports",
},
]
},
{
'name' => 'StreamingLocalErrors',
'tests' => [
{
# Define uses using non-existent command
'num' => 1,
'execonly' => 'local',
'java_params' => ['-Dopt.fetch=false'],
'pig' => q\
define CMD `perl PigStreamingNotThere.pl`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;\,
'expected_err_regex' => "Can't open perl script .*PigStreamingNotThere.pl.*: No such file or directory",
},
{
# Streaming application fails in the beginning of processing
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 2,
'execonly' => 'local',
'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl start`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 1",
},
{
# Streaming application fails in the middle of processing
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 3,
'execonly' => 'local',
'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl middle`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 2",
},
{
# Streaming application fails in the middle of processing
# bring logs to dfs
# NEED TO CHECK STDERR MANUALLY FOR NOW
'num' => 4,
'execonly' => 'local',
'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl end`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "failed with exit status: 3",
},
{
# Invalid deserializer - throws exception
'num' => 5,
'execonly' => 'local',
'pig' => q\
register :FUNCPATH:/testudf.jar;
define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` input(stdin using PigDump) output(stdout using org.apache.pig.test.udf.storefunc.DumpLoaderBad);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
store B into ':OUTPATH:';\,
'expected_err_regex' => "Could not resolve PigDump using imports",
},
{
# Invalid serializer - throws exception
'num' => 6,
'execonly' => 'local',
'pig' => q\
define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using StringStoreBad);
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through `perl PigStreaming.pl`;
C = stream B through CMD as (name, age, gpa);
D = foreach C generate name, age;
store D into ':OUTPATH:';\,
'expected_err_regex' => "Could not resolve StringStoreBad using imports",
},
]
},
{
'name' => 'LineageErrors',
'tests' => [
{
# UDF returns a bytearray that is cast to an integer
'num' => 1,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by name lt 'b';
c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
d = foreach c generate $0#'alice young';
split d into e if $0 < 42, f if $0 >= 42;
store e into ':OUTPATH:';\,
'expected_err_regex' => "Received a bytearray from the UDF or Union from two different Loaders. Cannot determine how to convert the bytearray to int",
},
]
}
]
}
;