blob: c51ae481bbe55e66b4d18de99e34cd86f2a9a7c6 [file] [log] [blame]
#!/usr/bin/env perl
############################################################################
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################
# Nightly tests for pig.
#
#
#PigSetup::setup();
$cfg = {
'driver' => 'Pig',
'execonly' => 'mapred,tez',
'groups' => [
{
'name' => 'BigData_Checkin',
'tests' => [
{
'num' => 1,
,'floatpostprocess' => 1
,'java_params' => ['-Dpig.tez.auto.parallelism=false']
,'delimiter' => ' ',
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < '50';
d = filter b by age < '50';
e = cogroup c by (name, age), d by (name, age) parallel 20;
f = foreach e generate flatten(c), flatten(d);
g = group f by registration parallel 20;
h = foreach g generate group, SUM(f.d::contributions);
i = order h by $1, $0 parallel 20;
store i into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = foreach a generate name;
d = foreach b generate name;
e = filter c by name matches '.*allen$';
f = union d, e;
g = distinct f parallel 20;
store g into ':OUTPATH:';\,
},
]
},
{
'name' => 'BigData_Group',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
a1 = filter a by age < '50';
b = group a1 by (name, age);
c = foreach b generate group as g, AVG(a1.gpa);
d = filter c by $1 > 3.0;
d1 = foreach d generate g.$0 as name, g.$1 as age, $1 as gpa;
e = group d1 by name;
f = foreach e generate group, AVG(d1.age);
store f into ':OUTPATH:';\,
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
b = group a all parallel 20;
c = foreach b generate COUNT(a.$0);
store c into ':OUTPATH:';\,
},
{
'num' => 3,
'java_params' => ['-Dpig.exec.mapPartAgg=true'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
b = group a by name parallel 20;
c = foreach b generate group, COUNT($1);
store c into ':OUTPATH:';\,
},
]
},
{
'name' => 'BigData_Streaming',
'tests' => [
{
'num' => 1,
'pig' => q\
define cmd `perl PigStreaming.pl` ship(':SCRIPTHOMEPATH:/PigStreaming.pl') stderr('CMD' limit 3);
a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
b = stream a through cmd as (n, a, g);
c = foreach b generate n, a;
store c into ':OUTPATH:';\,
},
]
},
{
'name' => 'BigData_Order',
'tests' => [
{
'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
b = order a by name parallel 20;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
{
'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:double);
b = order a by name, age desc parallel 20;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2nr,2nr'],
},
{
'num' => 3,
'pig' => q\A = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa);
B = filter A by age > 20;
C = group B by name;
D = foreach C generate group, COUNT(B) PARALLEL 16;
E = order D by $0 PARALLEL 16;
F = limit E 10;
store F into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1'],
},
]
},
]
}
;