| #!/usr/bin/env perl |
| ############################################################################ |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ############################################################################### |
| # Nightly tests for pig. |
| # |
| # |
| |
| #PigSetup::setup(); |
| |
| $cfg = { |
| 'driver' => 'Pig', |
| 'execonly' => 'mapred,tez', |
| |
| 'groups' => [ |
| { |
| 'name' => 'BigData_Checkin', |
| 'tests' => [ |
| { |
| 'num' => 1, |
| ,'floatpostprocess' => 1 |
| ,'java_params' => ['-Dpig.tez.auto.parallelism=false'] |
| ,'delimiter' => ' ', |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); |
| c = filter a by age < '50'; |
| d = filter b by age < '50'; |
| e = cogroup c by (name, age), d by (name, age) parallel 20; |
| f = foreach e generate flatten(c), flatten(d); |
| g = group f by registration parallel 20; |
| h = foreach g generate group, SUM(f.d::contributions); |
| i = order h by $1, $0 parallel 20; |
| store i into ':OUTPATH:';\, |
| }, |
| { |
| 'num' => 2, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); |
| c = foreach a generate name; |
| d = foreach b generate name; |
| e = filter c by name matches '.*allen$'; |
| f = union d, e; |
| g = distinct f parallel 20; |
| store g into ':OUTPATH:';\, |
| }, |
| ] |
| }, |
| { |
| 'name' => 'BigData_Group', |
| 'tests' => [ |
| { |
| 'num' => 1, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| a1 = filter a by age < '50'; |
| b = group a1 by (name, age); |
| c = foreach b generate group as g, AVG(a1.gpa); |
| d = filter c by $1 > 3.0; |
| d1 = foreach d generate g.$0 as name, g.$1 as age, $1 as gpa; |
| e = group d1 by name; |
| f = foreach e generate group, AVG(d1.age); |
| store f into ':OUTPATH:';\, |
| }, |
| { |
| 'num' => 2, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| b = group a all parallel 20; |
| c = foreach b generate COUNT(a.$0); |
| store c into ':OUTPATH:';\, |
| }, |
| { |
| 'num' => 3, |
| 'java_params' => ['-Dpig.exec.mapPartAgg=true'], |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| b = group a by name parallel 20; |
| c = foreach b generate group, COUNT($1); |
| store c into ':OUTPATH:';\, |
| }, |
| ] |
| }, |
| { |
| 'name' => 'BigData_Streaming', |
| 'tests' => [ |
| { |
| 'num' => 1, |
| 'pig' => q\ |
| define cmd `perl PigStreaming.pl` ship(':SCRIPTHOMEPATH:/PigStreaming.pl') stderr('CMD' limit 3); |
| a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| b = stream a through cmd as (n, a, g); |
| c = foreach b generate n, a; |
| store c into ':OUTPATH:';\, |
| }, |
| ] |
| }, |
| { |
| 'name' => 'BigData_Order', |
| 'tests' => [ |
| { |
| 'num' => 1, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| b = order a by name parallel 20; |
| store b into ':OUTPATH:';\, |
| 'sortArgs' => ['-t', ' ', '-k', '1,1'], |
| }, |
| { |
| 'num' => 2, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:double); |
| b = order a by name, age desc parallel 20; |
| store b into ':OUTPATH:';\, |
| 'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2nr,2nr'], |
| }, |
| { |
| 'num' => 3, |
| 'pig' => q\A = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); |
| B = filter A by age > 20; |
| C = group B by name; |
| D = foreach C generate group, COUNT(B) PARALLEL 16; |
| E = order D by $0 PARALLEL 16; |
| F = limit E 10; |
| store F into ':OUTPATH:';\, |
| 'sortArgs' => ['-t', ' ', '-k', '1,1'], |
| }, |
| ] |
| }, |
| ] |
| } |
| ; |
| |
| |
| |