blob: 4eb3ae10a0e596aa98392d355b5b8377b66e374f [file] [log] [blame]
#!/usr/bin/env perl
############################################################################
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################
# Nightly tests for pig.
#
#
#PigSetup::setup();
#my $me = `whoami`;
#chomp $me;
$cfg = {
'driver' => 'Pig',
'nummachines' => 5,
'verify_with_pig' => 1,
'verify_pig_version' => 'old',
'groups' => [
{
'name' => 'Checkin',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store a into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 50;
d = filter b by age < 50;
e = cogroup c by (name, age), d by (name, age) ;
f = foreach e generate flatten(c), flatten(d);
g = group f by registration;
h = foreach g generate group, SUM(f.d::contributions);
i = order h by $1;
store i into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
'sortArgs' => ['-t', ' ', '-k', '2,2n'],
}
]
},
{
'name' => 'LoaderDefaultDir',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/dir/studenttab10k' as (name, age, gpa);
store a into ':OUTPATH:';\,
},
]
},
{
'name' => 'LoaderPigStorageArg',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
store a into ':OUTPATH:';\,
},
{
# load with control character
'num' => 2,
'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
store a into ':OUTPATH:';#,
},
{
# load and store with control character
'num' => 3,
'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
store a into ':OUTPATH:.intermediate' using PigStorage('\\u0001');
b = load ':OUTPATH:.intermediate' using PigStorage('\\u0001') as (name, age, gpa);
store b into ':OUTPATH:'; #,
'notmq' => 1,
},
]
},
{
# Results doctored, if you change this query you need to copy the
# expected results into test/nightly/benchmarks
'name' => 'LoaderBinStorage',
'tests' => [
{
'num' => 1,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Swap(name, age), TOKENIZE((chararray)name), org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
store b into ':OUTPATH:.intermediate' using BinStorage();
c = load ':OUTPATH:.intermediate' using BinStorage();
store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
'notmq' => 1,
},
]
},
{
# Results doctored, if you change this query you need to copy the
# expected results into test/nightly/benchmarks
'name' => 'LoaderTextLoader',
'tests' => [
{
'num' => 1,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/textdoc' using TextLoader();
b = foreach a generate TOKENIZE((chararray)$0);
store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
},
]
},
{
'name' => 'FilterBoolean',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name == 'fred allen' and age > 50;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/dir/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name != 'fred allen' or age < 10;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by not (age == 50);
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by age >= 50 or name > 'fred' and gpa <= 3.0 or name >= 'bob';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter <= and >= for chararray, int and double
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter <= and >= for bytearray, long and float
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
b = filter a by age >= 40 and age <=50 and gpa >= 2.0f and gpa <= 3.0f and name >= 'bob' and name <= 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter < and > for chararray, int and double
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
b = filter a by age > 40 and age <50 and gpa > 2.0 and gpa < 3.0 and name > 'bob' and name < 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter < and > for bytearray, long and float
{
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
b = filter a by age > 40 and age <50 and gpa > 2.0f and gpa < 3.0f and name > 'bob' and name < 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter <= and >= for explicit cast for chararray, int and double
{
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by (int)age >= 40 and (int)age <=50 and (double)gpa >= 2.0 and (double)gpa <= 3.0 and (chararray)name >= 'bob' and (chararray)name <= 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter <= and >= for explicit cast for bytearray, long and float
{
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by (long)age >= 40 and (long)age <=50 and (float)gpa >= 2.0f and (float)gpa <= 3.0f and name >= 'bob' and name <= 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter < and > for explicit cast for chararray, int and double
{
'num' => 12,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test filter < and > for explicit cast for bytearray, long and float
{
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by (long)age > 40 and (long)age <50 and (float)gpa > 2.0f and (float)gpa < 3.0f and name > 'bob' and name < 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test AND with nulls
{
'num' => 14,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
b = filter a by name == 'fred allen' and age > 50;
store b into ':OUTPATH:' using PigStorage;\,
},
# test OR with nulls
{
'num' => 15,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
b = filter a by name != 'fred allen' or age < 10;
store b into ':OUTPATH:' using PigStorage;\,
},
# test with nulls filter <= and >= for chararray, int and double
{
'num' => 16,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
# test with nulls filter < and > for explicit cast for chararray, int and double
{
'num' => 17,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 18,
'ignore' => 1, # PIG-2593 this case is not supported as instate need to be declared as boolean
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == 'true';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 19,
'ignore' => 1, # PIG-2593 this case is not supported as instate need to be declared as boolean
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by not instate;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == 'false';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 20,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate is null;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 21,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == true;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == 'true';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 22,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == false;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == 'false';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 23,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by instate;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = filter a by instate == 'true';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 24,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by not instate;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = filter a by instate == 'false';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 25,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by instate is null;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = filter a by instate is null;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 26,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by instate == true;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = filter a by instate == 'true';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 27,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by instate == false;
store b into ':OUTPATH:' using PigStorage;\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = filter a by instate == 'false';
store b into ':OUTPATH:' using PigStorage;\,
},
],
},
{
'name' => 'FilterEq',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name == 'alice johnson' and age == 64 and gpa == 3.99;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name > 'fred allen' and age > 40 and gpa > 2.50;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name >= 'fred allen' and age >= 40 and gpa >= 2.50;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name lt 'fred allen' and age < 40 and gpa < 2.50;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name lte 'fred allen' and age <= 40 and gpa <= 2.50;
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
b = filter a by $0 neq 'fred allen' and $1 != '40' and $2 != '2.50';
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter == for chararray, int and double
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter == for bytearray, long and float
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42f;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter != for chararray, int and double
{
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter != for bytearray, long and float
{
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50f;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter == for explicit casts to chararray, int and double
{
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by (chararray)name == 'fred allen' and (int)age == 61 and (double)gpa == 1.42;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter == for explicit casts to bytearray, long and float
{
'num' => 12,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name == 'fred allen' and (long)age == 61 and (float)gpa == 1.42f;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter != for explicit casts to chararray, int and double
{
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
b = filter a by (chararray)$0 != 'fred allen' and (int)$1 != 40 and (double)$2 != 2.50;
store b into ':OUTPATH:' using PigStorage;\,
},
# test for filter != for explicit casts to bytearray, long and float
{
'num' => 14,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
b = filter a by $0 != 'fred allen' and (long)$1 != 40 and (float)$2 != 2.50f;
store b into ':OUTPATH:' using PigStorage;\,
},
]
},
{
'name' => 'FilterMatches',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = filter a by name matches '^fred.*';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
b = filter a by not $0 matches '^fred.*';
store b into ':OUTPATH:' using PigStorage;\,
},
{
# test for filter on matches for chararray (declared and explicit cast)
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
b = filter a by name matches '^fred.*' and (chararray)registration matches '^dem.*';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
b = filter a by name matches 'f.ed' and (chararray)registration matches 'd.m';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
b = filter a by name matches 'f[^f]ed.*';
store b into ':OUTPATH:' using PigStorage;\,
},
{
'num' => 6,
'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '.*\\\\wan.*';\nstore b into ':OUTPATH:' using PigStorage;",
},
{
'num' => 7,
'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '^e.*\\\\sc.*';\nstore b into ':OUTPATH:' using PigStorage;",
},
{
'num' => 8,
'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches 'ethan white';\nstore b into ':OUTPATH:' using PigStorage;",
},
{
'num' => 9,
'pig' => "a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);\nb = filter a by gpa matches '\\\\d\\\\.45';\nstore b into ':OUTPATH:' using PigStorage;",
},
]
},
{
'name' => 'FilterUdf',
'tests' => [
{
'num' => 1,
'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = cogroup a by (name, age), b by (name, age);
d = filter c by not IsEmpty(a);
e = filter d by not IsEmpty(b);
f = foreach e generate flatten(a), flatten(b);
store f into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 50;
d = filter b by age < 50;
e = cogroup c by (name, age), d by (name, age);
f = filter e by COUNT(c)> 0 AND COUNT(d)>0;
store f into ':OUTPATH:';\,
'rc' => 0
},
]
},
# TODO Group that don't flatten via Agg functions
{
'name' => 'GroupAggFunc',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, COUNT(a.age);
store c into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = group a by $0;
c = foreach b generate group, COUNT(a.$1);
store c into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by (name, age);
c = foreach b generate group.name, group.age, COUNT(a.gpa);
store c into ':OUTPATH:';\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a all;
c = foreach b generate COUNT(a.$0);
store c into ':OUTPATH:';\,
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, SUM(a.age);
store c into ':OUTPATH:';\,
},
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, SUM(a.gpa);
store c into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, AVG(a.age);
store c into ':OUTPATH:';\,
},
{
'num' => 9,
'floatpostprocess' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, AVG(a.gpa);
store c into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
'decimals' => 6,
},
{
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, MIN(a.gpa);
store c into ':OUTPATH:';\,
},
{
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, MAX(a.gpa);
store c into ':OUTPATH:';\,
},
{
'num' => 12,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by (name, age);
c = foreach b generate flatten(group), SUM(a.gpa);
store c into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
{
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by (name);
c = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
d = cogroup b by group, c by name;
e = foreach d generate flatten(group), SUM(c.gpa), COUNT(c.name);
store e into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
{
'num' => 14,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = group a by (name);
e = foreach b generate COUNT(a.name);
store e into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = group a by (name);
e = foreach b generate COUNT(a.name);
store e into ':OUTPATH:';\,
}
],
},
{
'name' => 'MapPartialAgg',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b generate group, COUNT(a.age);
store c into ':OUTPATH:';\,
'java_params' => ['-Dpig.exec.mapPartAgg=true']
},
{
#multiquery with group in one sub query
'num' => 2,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
b = filter a by age < 22; store b into ':OUTPATH:.1';
c = group b by age;
d = foreach c generate group, SUM(b.gpa);
store d into ':OUTPATH:.2'; #,
'java_params' => ['-Dpig.exec.mapPartAgg=true']
},
{
#multi query with two group on diff columns
'num' => 3,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
g1 = group a by name;
f1 = foreach g1 generate group as name, MAX(a.gpa);
store f1 into ':OUTPATH:.1';
g2 = group a by age;
f2 = foreach g2 generate group as age, AVG(a.gpa);
store f2 into ':OUTPATH:.2'; #,
'java_params' => ['-Dpig.exec.mapPartAgg=true']
},
{
#multi query with three groups on diff columns, group key being an expression
'num' => 4,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
g1 = group a by name;
f1 = foreach g1 generate group as name, MAX(a.gpa);
store f1 into ':OUTPATH:.1';
g2 = group a by age%10;
f2 = foreach g2 generate group as age_mod10, AVG(a.gpa);
store f2 into ':OUTPATH:.2';
g3 = group a by age;
f3 = foreach g3 generate group%10, AVG(a.gpa);
store f3 into ':OUTPATH:.3';
g4 = group a by gpa;
f4 = foreach g4 generate group as gpa, COUNT(a);
store f4 into ':OUTPATH:.4';
#,
'java_params' => ['-Dpig.exec.mapPartAgg=true']
},
{
#aggregation gets more than one tuple for every tuple from load func
'num' => 5,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
b = foreach a generate name, age, gpa, flatten(TOBAG(age,age)) as x;
c = group b by age;
d = foreach c generate group, AVG(b.gpa);
store d into ':OUTPATH:'; #,
'java_params' => ['-Dpig.exec.mapPartAgg=true']
},
{
#PIG-4707 Streaming and empty input
'num' => 6,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
b = group a by name;
c = foreach b generate flatten(a);
d = stream c through `cat` as (name, age, gpa);
e = filter d by name == 'nonexistent';
SPLIT e into f if gpa > 2, g otherwise;
store f into ':OUTPATH:.1';
store g into ':OUTPATH:.2';
#,
'java_params' => ['-Dpig.exec.mapPartAgg=true']
},
],
},
{
'name' => 'EvalFunc',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by name lt 'b';
c = foreach b generate ARITY(name, age, gpa);
store c into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
b = filter a by name lt 'b';
c = foreach b generate TOKENIZE(name);
d = foreach c generate flatten($0);
store d into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by name lt 'b';
c = foreach b generate org.apache.pig.test.udf.evalfunc.Swap(name, age);
store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
},
{
'num' => 4,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by name lt 'b';
c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
},
{
'num' => 5,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = foreach a generate org.apache.pig.test.udf.evalfunc.TestBoolean(instate);
store b into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = foreach a generate (instate is null ? '' : (instate == 'true' ? 'false' : 'true'));
store b into ':OUTPATH:';\,
}
]
},
# TODO DIFF
# TODO User defined grouping function
{
'name' => 'CoGroupFlatten',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
e = cogroup c by name, d by name;
f = foreach e generate flatten (c), flatten(d);
store f into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by $1 < 20;
d = filter b by $1 < 20;
e = cogroup c by $0, d by $0;
f = foreach e generate flatten (c), flatten(d);
store f into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
e = cogroup c by (name, age), d by (name, age);
f = foreach e generate flatten (c), flatten(d);
store f into ':OUTPATH:';\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
d = filter b by age < 20;
e = cogroup a by (name, age) inner, d by (name, age);
f = foreach e generate flatten (a), flatten(d);
store f into ':OUTPATH:';\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
e = cogroup c by (name, age), b by (name, age) inner;
f = foreach e generate flatten (c), flatten(b);
store f into ':OUTPATH:';\,
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = cogroup a by (name, age) inner, b by (name, age) inner;
f = foreach e generate flatten (a), flatten(b);
store f into ':OUTPATH:';\,
},
{
# Test cogrouping data loaded from two separate loaders. We don't have any data that can join with studenttab that isn't also loaded with PigStorage, so the
# first step is an intermediate load and store using BinStorage.
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
store a into ':OUTPATH:.intermediate' using BinStorage();
b = load ':OUTPATH:.intermediate' using BinStorage() as (name, age, gpa);
c = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = cogroup b by (name, age) inner, c by (name, age) inner;
f = foreach e generate flatten (b), flatten(c);
store f into ':OUTPATH:';\,
'notmq' => 1,
},
]
},
{
'name' => 'CoGroup',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = cogroup a by name, b by name;
d = foreach c generate flatten(group), COUNT(a) + COUNT(b);
store d into ':OUTPATH:';\,
},
]
},
{
'name' => 'Join',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
e = join c by name, d by name;
store e into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
e = join c by $0, d by $0;
store e into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
e = join c by (name, age), d by (name, age);
store e into ':OUTPATH:';\,
},
# self join with implict split
# JIRA PIG-429
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = filter a by $1 > 25;
c = join a by $0, b by $0;
store c into ':OUTPATH:';\,
},
# join with one input having schema and another without
# JIRA PIG-428
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray,age:int, gpa:double);
another = load ':INPATH:/singlefile/studenttab10k';
c = foreach another generate $0, $1+ 10, $2 + 10.0;
d = join a by $0, c by $0;
store d into ':OUTPATH:';\,
},
# self join using fragment replicate join
# no types
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = join a by name, b by name using 'repl';
store c into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = join a by name, b by name ;
store c into ':OUTPATH:';\,
},
# self join using fragment replicate join
# with types and no cast for join key
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
c = join a by name, b by name using 'repl';
store c into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
c = join a by name, b by name ;
store c into ':OUTPATH:';\,
},
# self join using fragment replicate join
# with types and cast for join key
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
c = join a by gpa, b by gpa using 'repl';
store c into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
c = join a by gpa, b by gpa ;
store c into ':OUTPATH:';\,
},
# left outer join
{
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
c = join a by name left outer, b by name;
store c into ':OUTPATH:';\,
},
# right outer join
{
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
c = join a by name right outer, b by name;
store c into ':OUTPATH:';\,
},
# full outer join
{
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
c = join a by name full outer, b by name;
store c into ':OUTPATH:';\,
},
# see PIG-1209 join package now uses internalcachedBag, so every tuple on reduce side in this test will spilled to disk.
{
'num' => 12,
'java_params' => ['-Dpig.cachedbag.memusage=0'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
e = join c by name, d by name;
store e into ':OUTPATH:';\,
},
{
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
c = filter a by age < 20;
d = filter b by age < 20;
e = join c by instate, d by instate parallel 5;
store e into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
c = filter a by age < 20;
d = filter b by age < 20;
e = join c by instate, d by instate parallel 5;
store e into ':OUTPATH:';\,
}
]
},
{
'name' => 'Foreach',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate *;
store b into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = foreach a generate *;
store b into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, age;
store b into ':OUTPATH:';\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = foreach a generate $0, $2;
store b into ':OUTPATH:';\,
},
{
# test filter, projection, sort , duplicate elimination
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by age < 20;
c = group b by age;
d = foreach c {
cf = filter b by gpa < 3.0;
cp = cf.gpa;
cd = distinct cp;
co = order cd by $0;
generate group, flatten(co);
}
store d into ':OUTPATH:';\,
},
{
# test flatten for map and scalar
'num' => 6,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate flatten(name) as n, flatten(org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, gpa)) as m;
store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
'verify_pig_script' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate flatten(name) as n, name as m::key, gpa as m::value;
store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
},
{
# test flatten for UDF that returns bag with multiple tuples with multiple columns
'num' => 7,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, flatten(org.apache.pig.test.udf.evalfunc.CreateTupleBag(age, gpa)) as foo;
store b into ':OUTPATH:';\,
},
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age: int, gpa);
c = group a by name;
d = foreach c generate flatten(group), MAX(a.age) + MIN(a.age);
store d into ':OUTPATH:';\,
},
{
# test filter, projection, sort , duplicate elimination
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by age < 20;
c = group b by age;
d = foreach c {
cf = filter b by gpa >= 3.0 and gpa <= 3.5;
cp = cf.gpa;
cd = distinct cp;
co = order cd by $0;
generate group, flatten(co);
}
store d into ':OUTPATH:';\,
},
{
# test filter, projection, sort , duplicate elimination
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by age < 20;
c = group b by age;
d = foreach c {
cf = filter b by (gpa == 4.0 or gpa != 2.0) and name > 'a';
cp = cf.gpa;
cd = distinct cp;
co = order cd by $0;
generate group, flatten(co);
}
store d into ':OUTPATH:';\,
},
{
# test filter, projection, sort , duplicate elimination
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = filter a by age < 20;
c = foreach b {
exp1 = age + gpa;
exp2 = exp1 + age;
generate exp1, exp2;
}
store c into ':OUTPATH:';\,
},
{
# test a udf with no args
'num' => 12,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
store b into ':OUTPATH:';\,
},
{
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = foreach a generate *;
store b into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
b = foreach a generate *;
store b into ':OUTPATH:';\,
}
]
},
{
'name' => 'Order',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name;
c = order b by name;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = foreach a generate $1;
c = order b by $0;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate gpa;
c = order b by gpa;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = order a by *;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' '],
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, age;
c = order b by name, age;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,2'],
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
c = order a by $0;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
c = order a by $1;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2,2'],
},
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
c = order a by $0, $1;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,2'],
},
{
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
c = order a by $1, $0;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2,2', '-k', '1,1'],
},
{
'num' => 10,
'ignore' => 'order by UDF is not supported',
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k';
c = order a by * using org.apache.pig.test.udf.orderby.OrdDesc;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-r'],
},
{
'num' => 11,
'ignore' => 'order by UDF is not supported',
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k';
c = order a by $0 using org.apache.pig.test.udf.orderby.OrdDesc;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-r', '-k', '1,1'],
},
{
'num' => 12,
'ignore' => 'order by UDF is not supported',
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k';
c = order a by $0, $1 using org.apache.pig.test.udf.orderby.OrdDesc;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-r', '-k', '1,2'],
},
# ALERT All these tests with inner order bys aren't testing the inner
# ordering. We need to develop a sorting tool to do that.
{
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = group a by $0;
c = foreach b {c1 = order $1 by $1; generate flatten(c1); };
store c into ':OUTPATH:';\,
},
{
'num' => 14,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = group a by $0;
c = foreach b {c1 = order $1 by *; generate flatten(c1); };
store c into ':OUTPATH:';\,
},
{
'num' => 15,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k';
b = group a by $0;
c = foreach b {c1 = order $1 by * using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1); };
store c into ':OUTPATH:';\,
},
{
'num' => 16,
'pig' => q\register :FUNCPATH:/testudf.jar;
a = load ':INPATH:/singlefile/studenttab10k';
b = group a by $0;
c = foreach b {c1 = order $1 by $1 using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1);};
store c into ':OUTPATH:';\,
},
{
'num' => 17,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = group a by $0;
c = foreach b {c1 = order $1 by $1; generate flatten(c1), MAX($1.$1); };
store c into ':OUTPATH:';\,
},
{
# test to make sure the weighted range patitioning
# works correctly when a sort key value repeats across
# reduce partitions
'num' => 18,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = order a by $1 parallel 100;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2,2'],
},
{
'num' => 19,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = foreach a generate instate;
c = order b by instate;
store c into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
b = foreach a generate instate;
c = order b by instate;
store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
'num' => 20,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = order a by name ASC, age DESC parallel 9;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2,2nr'],
},
]
},
{
'name' => 'Distinct',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name;
c = distinct b;
store c into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = foreach a generate $1;
c = distinct b;
store c into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate gpa;
c = distinct b;
store c into ':OUTPATH:';\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = distinct a;
store b into ':OUTPATH:';\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, age;
c = distinct b;
store c into ':OUTPATH:';\,
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { aa = distinct a.age; generate group, COUNT(aa); }
store c into ':OUTPATH:';\,
}
]
},
{
'name' => 'Cross',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 19 and gpa < 1.0;
d = filter b by age < 19;
e = cross c, d;
store e into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 19 and gpa < 1.0;
d = filter b by age < 19;
e = cross c, d parallel 10;
store e into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\set default_parallel 10;
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 19 and gpa < 1.0;
d = filter b by age < 19;
e = cross c, d;
store e into ':OUTPATH:';\,
},
{
'num' => 4,
'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 25;
d = filter b by age < 25;
e = cross c, d;
f = filter e by c::age < d::age;
store f into ':OUTPATH:';\,
},
{
'num' => 5,
'pig' => q\
set default_parallel 2
a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
b = foreach a generate registration;
c = distinct b;
d = group c all;
e = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
f = cross e, d;
g = foreach f generate $0, $1, $2, flatten($3);
store g into ':OUTPATH:';\,
'verify_pig_script' => q\
a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
b = foreach a generate registration;
c = distinct b;
d = group c all;
e = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
f = cross e, d;
g = foreach f generate $0, $1, $2, flatten($3);
store g into ':OUTPATH:';\,
}
]
},
{
'name' => 'Union',
'tests' => [
{
# Simple store
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = foreach a generate name, age;
d = foreach b generate name, age;
e = union c, d;
store e into ':OUTPATH:';\,
},
{
# Union + Groupby + Combiner
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = group c by name;
e = foreach d generate group, SUM(c.age);
store e into ':OUTPATH:';\,
},
{
# Union + Groupby + Secondary key partitioner
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = group c by name;
d1 = group c by name; -- Two separate groupbys to ensure secondary key partitioner
e = foreach d { f = order c by age, gpa ; g = limit f 1; generate g; };
h = foreach d1 { i = order c by age asc, gpa desc; j = limit i 1; generate j; };
store e into ':OUTPATH:.1';
store h into ':OUTPATH:.2';\,
},
{
# Union + Orderby
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = order c by name PARALLEL 2;
store d into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
# Simple split + Union
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
split a into a1 if age < 50, a2 otherwise;
c = union a1, b;
d = order c by name PARALLEL 2;
store a2 into ':OUTPATH:.1';
store d into ':OUTPATH:.2';\,
},
{
# Union + Join
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = join c by name, d by name PARALLEL 2;
store e into ':OUTPATH:';\,
},
{
# Union + Replicate Join left
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = join c by name, d by name using 'replicated';
store e into ':OUTPATH:';\,
},
{
# Union + Replicate Join right
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = join d by name, c by name using 'replicated';
store e into ':OUTPATH:';\,
},
{
# Union + Skewed Join left
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = join c by name, d by name using 'skewed' PARALLEL 5;
store e into ':OUTPATH:';\,
},
{
# Union + Skewed Join right
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = join d by name, c by name using 'skewed' PARALLEL 5;
store e into ':OUTPATH:';\,
},
{
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
d = foreach a generate name, age;
e = foreach b generate name, age;
f = foreach c generate name, age;
g = union d, e;
h = union f, g;
i = group h by name;
i = foreach i generate group, SUM(h.age);
store i into ':OUTPATH:';\,
},
{
# Union + operators
'num' => 12,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa:double);
c = union a, b;
-- Exercise all expression operators --
d = foreach c generate (name is not NULL? UPPER(name) : 'FNU LNU') as name, (age < 30 ? -1 : age) as age, (gpa is NULL ? 0.0 : ((gpa > 0.5 AND gpa < 1.0) ? 1 : gpa)) as gpa;
e = filter d by (name matches '.*MIKE.*') OR (NOT (gpa + 1.5 > 4));
store e into ':OUTPATH:';\,
},
{
# Union + Groupby + Replicate join
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = group c by name;
e = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
f = join d by group, e by name using 'replicated';
g = foreach f generate group, flatten(c), name, age, registration, contributions;
store g into ':OUTPATH:';\,
},
{
# Group by with Secondary Key + Union
'num' => 14,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age, gpa);
c = group a by name;
d = foreach c {
sorted = order a by name,age,gpa;
lmt = limit sorted 1;
generate lmt as c1;
};
e = foreach d generate flatten(c1) as (name:chararray, age, gpa);
f = group b by name;
g = foreach f {
sorted = order b by name,age,gpa;
lmt = limit sorted 1;
generate lmt as f1;
};
h = foreach g generate flatten(f1) as (name:chararray, age, gpa);
i = union e, h;
j = order i by name parallel 1;
store j into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
# Union + Cross
'num' => 15,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa:float);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa:float);
c = filter a by gpa >= 4;
d = cross a, c;
e = union b, d;
store e into ':OUTPATH:';\,
},
{
# Union + Distinct
'num' => 16,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
c = union a, b;
d = distinct c;
store c into ':OUTPATH:';\,
},
{
# Union + Groupby + FILTER
'num' => 17,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa:float);
b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa:float);
c = group a by name;
d = group b by name;
e = union c, d;
e = foreach e generate $0, $1 as groupbag;
f = foreach e {
g = order $1 by age asc, gpa desc;
h = filter g by (gpa == 0 ? true : false);
generate group, h; };
store f into ':OUTPATH:';\,
}
]
},
{
'name' => 'Bincond',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, (name matches 'yuri.*' ? age - 10 : (int)age);
store b into ':OUTPATH:';\,
},
]
},
{
'name' => 'Glob',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10?' as (name, age, gpa);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/st*ttab10k' as (name, age, gpa);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/voter*' as (name, age, registration, contributions);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/student???10k' as (name, age, registration, contributions);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studentta[a-z][1-9]0[!m],:INPATH:/singlefile/voter{,null}tab10k' as (name, age);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab[13]0[km]' as (name, age, gpa);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab[12]0[a-l]' as (name, age, gpa);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 8,
'pig' => q\a = load ':INPATH:/glob/star/*good' as (name, age, gpa);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
},
{
'num' => 9,
'pig' => q\a = load ':INPATH:/glob/star/*' as (name, age, gpa);
b = filter a by name == 'nick miller';
store b into ':OUTPATH:';\,
}
]
},
{
'name' => 'Arithmetic',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate age + 1, (int)gpa + 1;
store c into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate (double)age + 1.5, gpa + 1.5;
store c into ':OUTPATH:';\,
},
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate age - 30, (int)gpa - 3;
store c into ':OUTPATH:';\,
},
{
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate (double)age - 30.1, gpa - 3.199;
store c into ':OUTPATH:';\,
},
{
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate age * 10, (int)gpa * 2;
store c into ':OUTPATH:';\,
},
{
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate (double)age * 10.1, gpa * 2.752342;
store c into ':OUTPATH:';\,
},
{
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate age / 30, (int)gpa / 3;
store c into ':OUTPATH:';\,
},
{
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate (double)age / 30.323, gpa / 3.22;
store c into ':OUTPATH:';\,
},
{
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate 3 * age + gpa / 9.1 - 2;
store c into ':OUTPATH:';\,
},
{
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
c = foreach a generate 3 * (age + gpa) / (9.1 - 2);
store c into ':OUTPATH:';\,
}
]
},
{
'name' => 'Regression',
'tests' => [
{
'num' => 1459894,
'pig' => q\a = load ':INPATH:/singlefile/reg1459894';
b = group a by $0;
c = foreach b generate group, COUNT(a.$1);
store c into ':OUTPATH:';\,
},
{
'num' => 97,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
e = cogroup a by name, b by name;
f = foreach e generate group, COUNT(a), COUNT(b);
store f into ':OUTPATH:';\,
},
{
'num' => 203,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = group a by name;
c = foreach b generate group, COUNT($1);
store c into ':OUTPATH:';
--This is a really long script to test that when script size exceeds 1k we can still parse it.
--The quick sly fox jumped over the lazy brown dog.
--he quick sly fox jumped over the lazy brown dog.T
--e quick sly fox jumped over the lazy brown dog.Th
-- quick sly fox jumped over the lazy brown dog.The
--quick sly fox jumped over the lazy brown dog.The
--uick sly fox jumped over the lazy brown dog.The q
--ick sly fox jumped over the lazy brown dog.The qu
--ck sly fox jumped over the lazy brown dog.The qui
--k sly fox jumped over the lazy brown dog.The quic
-- sly fox jumped over the lazy brown dog.The quick
--sly fox jumped over the lazy brown dog.The quick
--ly fox jumped over the lazy brown dog.The quick s
--y fox jumped over the lazy brown dog.The quick sl
-- fox jumped over the lazy brown dog.The quick sly
--fox jumped over the lazy brown dog.The quick sly
--ox jumped over the lazy brown dog.The quick sly f
--x jumped over the lazy brown dog.The quick sly fo
-- jumped over the lazy brown dog.The quick sly fox
--jumped over the lazy brown dog.The quick sly fox
--umped over the lazy brown dog.The quick sly fox j
--mped over the lazy brown dog.The quick sly fox ju
--ped over the lazy brown dog.The quick sly fox jum\,
}
]
},
{
'name' => 'Unicode',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/unicode100';
store a into ':OUTPATH:';\,
},
]
},
{
'name' => 'Parameters',
'tests' => [
{
# test default
'num' => 1,
'pig' => q\%default fname 'studenttab10k'
a = load ':INPATH:/singlefile/$fname' using PigStorage() as (name, age, gpa);
b = foreach a generate name;
store b into ':OUTPATH:';\,
},
{
# test paramter from command line
'num' => 2,
'pig_params' => ['-p', qq(fname='studenttab10k')],
'pig' => q\a = load ':INPATH:/singlefile/$fname' using PigStorage() as (name, age, gpa);
b = foreach a generate name;
store b into ':OUTPATH:';\,
},
{
# test paramter from param file
'num' => 3,
'pig_params' => ['-m', ":PARAMPATH:/params_3"],
'pig' => q\a = load ':INPATH:/singlefile/$fname' using PigStorage() as (name, age, gpa);
b = foreach a generate name;
store b into ':OUTPATH:';\,
},
{
# test command
'num' => 4,
'pig' => q\%declare cmd `perl -e "print 'studenttab10k'"`
a = load ':INPATH:/singlefile/$cmd' using PigStorage() as (name, age, gpa);
b = foreach a generate name;
store b into ':OUTPATH:';\,
},
{
# test parameter with a space
'num' => 5,
'pig_params' => ['-p', qq(setting='set default_parallel 100;'),'-p',qq(fname='studenttab10k')],
'pig' => q\a = load ':INPATH:/singlefile/$fname' using PigStorage() as (name, age, gpa);
$setting
b = foreach a generate name;
store b into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/$fname' using PigStorage() as (name, age, gpa);
b = foreach a generate name;
store b into ':OUTPATH:';\,
},
]
},
{
'name' => 'Types_Constants',
'tests' => [
{
# constants
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate age + 1 + 0.2f + 253645L, gpa+1;
store b into ':OUTPATH:';\,
},
{
# constants
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate -(age + 1 + 0.2f + 253645L), -(gpa+1);
store b into ':OUTPATH:';\,
},
{
# test precision for doubles is atleast 15 digits
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate 0.123456789123456+0.123456789123456;
store b into ':OUTPATH:';\,
},
]
},
{
'name' => 'Types_Cast',
'tests' => [
{
# NULL and cast
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;
c = foreach b generate (norm_gpa is null? 0 :norm_gpa);
store c into ':OUTPATH:';\,
# 'expected_err_regex' => "Encountered Warning DIVIDE_BY_ZERO 2387 time.*",
# Driver does currently not support both 'sql' and 'expected_...' verification directives.
},
{
# Not NULL and cast
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;
c = foreach b generate (norm_gpa is not null? norm_gpa: 0);
store c into ':OUTPATH:';\,
},
# boolean cast
{
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
b = foreach a generate instate, true, false;
store b into ':OUTPATH:';\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
b = foreach a generate instate, 'true', 'false';
store b into ':OUTPATH:';\,
},
]
},
{
'name' => 'Types_ArithmeticCast',
'tests' => [
{
# arithmetic operators and SIZE for int, double and size and concat operators for chararrays
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate age, gpa, age % 25, age + 25, age - 25, age/2, age * 2, SIZE(age), gpa + 10.1, gpa - 1.1 , gpa / 1.2, gpa * 2.5, SIZE(gpa), SIZE(name), CONCAT(name, 'test');
store b into ':OUTPATH:';\,
},
{
# arithmetic operators and SIZE for long, float and size and concat operators for bytearrays
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = foreach a generate age, gpa, age % 2L, age + 2500000000L, age - 2500000000L, age/2L, age * 250000000L, SIZE(age), gpa + 10.1f, gpa - 1.1f , gpa / 1.2f, gpa * 2.6f, SIZE(gpa), SIZE(name), CONCAT(name, name);
store b into ':OUTPATH:';\,
},
{
# equality and implicit cast
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
b = filter a by age == '25' and gpa < 3;
store b into ':OUTPATH:';\,
},
{
# will need to test against previous version of pig
# because in pig currently count includes nulls - this affects
# avg
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = group a ALL;
c = foreach b generate SUM(a.age), MIN(a.age), MAX(a.age), AVG(a.age), MIN(a.name), MAX(a.name), SUM(a.gpa), MIN(a.gpa), MAX(a.gpa), AVG(a.gpa);
store c into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
{
# sum, min, max, avg for long and float (declared)
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = group a ALL;
c = foreach b generate SUM(a.age), MIN(a.age), MAX(a.age), AVG(a.age), SUM(a.gpa), MIN(a.gpa), MAX(a.gpa), AVG(a.gpa);
store c into ':OUTPATH:';\,
},
{
# Explicit casts - arithmetic operators and SIZE for int, double and size and concat operators for chararrays
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
b = foreach a generate (int)age % 25, (int)age + 25, (int)age - 25, (int)age/2, (int)age * 2, SIZE((int)age), (double)gpa + 10.1, (double)gpa - 1.1 , (double)gpa / 1.2, (double)gpa * 2.5, SIZE((double)gpa), SIZE((chararray)name), CONCAT((chararray)name, 'test');
store b into ':OUTPATH:';\,
},
{
# Explicit casts - arithmetic operators and SIZE for long, float
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
b = foreach a generate (long)age, (long)age % 2L, (long)age + 2500000000L, (long)age - 2500000000L, (long)age/2L, (long)age * 250000000L, SIZE((long)age), (float)gpa + 10.1f, (float)gpa - 1.1f , (float)gpa / 1.2f, (float)gpa * 2.6f, SIZE((float)gpa);
store b into ':OUTPATH:';\,
},
]
},
{
'name' => 'Types_Filter',
'tests' => [
{
# Filter is null for chararray and double and is not null for int
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = filter a by name is null and age is not null and gpa is null;
c = group b ALL;
d = foreach c generate COUNT(b);
store d into ':OUTPATH:';\,
},
{
# Filter is not null for chararray and double and is null for int
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = filter a by name is not null and age is null and gpa is not null;
c = group b ALL;
d = foreach c generate COUNT(b);
store d into ':OUTPATH:';\,
},
{
# Filter is null for bytearray and float and is not null for long
'num' => 3,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = filter a by name is null and age is not null and gpa is null;
c = group b ALL;
d = foreach c generate COUNT(b);
store d into ':OUTPATH:';\,
},
{
# Filter is not null for bytearray and float and is null for long
'num' => 4,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = filter a by name is not null and age is null and gpa is not null;
c = group b ALL;
d = foreach c generate COUNT(b);
store d into ':OUTPATH:';\,
},
]
},
{
'name' => 'Types_Order',
'tests' => [
{
# test that sorting is based on the type for chararray, int and double
'num' => 1,
'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by name, age, gpa;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'],
},
{
# test that sorting descending is based on the type for chararray, int and double
'num' => 2,
'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by name desc, age desc, gpa desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'],
},
{
# test that sorting is based on the type for bytearray, long and float
'num' => 3,
'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = order a by name, age, gpa;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'],
},
{
# test that sorting descending is based on the type for chararray, age and float
'num' => 4,
'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = order a by name desc, age desc, gpa desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'],
},
{
# order by string
'num' => 5,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by name;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
# order by string desc
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by name desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r'],
},
{
# order by int
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by age;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2n,2n'],
},
{
# order by int desc
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by age desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2nr,2nr'],
},
{
# order by long
'num' => 9,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:long, gpa:double);
b = order a by age;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2n,2n'],
},
{
# order by long desc
'num' => 10,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:long, gpa:double);
b = order a by age desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '2nr,2nr'],
},
{
# order by float
'num' => 11,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float);
b = order a by gpa;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '3n'],
},
{
# order by float desc
'num' => 12,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float);
b = order a by gpa desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '3nr'],
},
{
# order by double
'num' => 13,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by gpa;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '3n'],
},
{
# order by double desc
'num' => 14,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by gpa desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '3nr'],
},
{
# order by *
'num' => 15,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by *;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'],
},
{
# order by * desc
'num' => 16,
'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
b = order a by * desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'],
},
]
},
{
'name' => 'Types_CoGroup',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:double);
c = filter a by age < 20;
d = filter b by age < 20;
e = cogroup c by name