| #!/usr/bin/env perl |
| |
| ############################################################################ |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ############################################################################### |
| # Nightly tests for pig. |
| # |
| # |
| |
| #PigSetup::setup(); |
| |
| #my $me = `whoami`; |
| #chomp $me; |
| |
| $cfg = { |
| 'driver' => 'Pig', |
| 'nummachines' => 5, |
| |
| 'groups' => [ |
| { |
| 'name' => 'Describe_cmdline', |
| 'floatpostprocess' => 0, |
| 'delimiter' => ' ', |
| 'tests' => [ |
| #JIRA[PIG-372] |
| #JIRA[PIG-374] |
| #JIRA[PIG-384] |
| { |
| 'num' => 1, |
| 'pig' => q\ |
| A= load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); |
| describe A;\, |
| 'expected_out_regex' => "A: {name: bytearray,age: bytearray,gpa: bytearray}", |
| }, |
| #JIRA[PIG-19], Commented out until fixed. |
| # { |
| # 'num' => 2, |
| # 'pig' => q\ |
| #A=load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); |
| #describe A;\, |
| # 'expected_out_regex' => "A: {name: bytearray,age: bytearray,gpa: bytearray}", |
| # }, |
| #JIRA[PIG-373] |
| { |
| 'num' => 3, |
| 'pig' => q\ |
| A= load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double); |
| describe A;\, |
| 'expected_out_regex' => "A: {name: chararray,age: int,gpa: double}", |
| }, |
| #******************************************************** |
| #QUESTION: S/B SQL VERIFIER for DUMP statement? |
| #******************************************************** |
| # #JIRA[PIG-373] |
| # { |
| # 'num' => 4, |
| # 'java_params' => ['-Dopt.fetch=false'], |
| # 'pig' => q\ |
| #A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray); |
| #describe A; |
| #A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray); |
| #dump A;\, |
| # |
| # 'sql' => "select name, age, gpa from studenttab10k;", |
| # }, |
| |
| #JIRA[PIG-373] |
| #JIRA[PIG-405] |
| { |
| 'num' => 5, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); |
| describe A;\, |
| 'expected_out_regex' =>"A: {m: map\\[\\],x: bytearray,y: bytearray}", |
| }, |
| |
| #JIRA[PIG-373] |
| #JIRA[PIG-405] |
| { |
| 'num' => 6, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); |
| B= foreach A generate m; |
| describe A; |
| describe B;\, |
| #Expect |
| # A : { m:map, x:bytearray, y:bytearray } |
| # B : { m :map } |
| 'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}", |
| }, |
| #JIRA[PIG-373] |
| #JIRA[PIG-405] |
| { |
| 'num' => 7, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); |
| describe A; |
| B= foreach A generate m; |
| describe B;\, |
| #Expect |
| # A : { m:map, x:bytearray, y:bytearray } |
| # B : { m :map } |
| 'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}", |
| }, |
| #JIRA[PIG-373] |
| #JIRA[PIG-405] |
| { |
| 'num' => 8, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); |
| B= foreach A generate m; |
| describe B; |
| describe A;\, |
| #Expect |
| # A : { m:map, x:bytearray, y:bytearray } |
| # B : { m :map } |
| 'expected_out_regex' => "B: {m: map\\[\\]}\nA: {m: map\\[\\],x: bytearray,y: bytearray}", |
| }, |
| { |
| 'num' => 14, |
| 'pig' => q\A = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean); |
| describe A;\, |
| 'expected_out_regex' => "A: {name: chararray,age: int,gpa: double,instate: boolean}", |
| }, |
| |
| #JIRA[PIG-379] |
| { |
| 'num' => 9, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); |
| describe A; |
| B= foreach A generate name, age; |
| describe B; |
| C= filter B by age > 30; |
| describe C; |
| D= group C by name; |
| describe D;\, |
| # EXPECT |
| # A: {name: chararray,age: integer,gpa: double} |
| # B: {name: chararray,age: integer} |
| # C: {name: chararray,age: integer} |
| # D: {group: chararray C: {name: chararray,age: integer}} |
| 'expected_out_regex' => |
| "A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {\\(name: chararray,age: int\\)}}", |
| |
| }, |
| { |
| 'num' => 10, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); |
| B= foreach A generate name, age; |
| C= filter B by age > 30; |
| D= group C by name; |
| describe A; |
| describe B; |
| describe C; |
| describe D;\, |
| # EXPECT |
| # A: {name: chararray,age: integer,gpa: double} |
| # B: {name: chararray,age: integer} |
| # C: {name: chararray,age: integer} |
| # D: {group: chararray C: {name: chararray,age: integer}} |
| 'expected_out_regex' => |
| "A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {\\(name: chararray,age: int\\)}}\n", |
| |
| }, |
| { |
| 'num' => 11, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); |
| B= foreach A generate name, age; |
| C= filter B by age > 30; |
| D= group C by name; |
| describe D; |
| describe C; |
| describe B; |
| describe A;\, |
| # EXPECT |
| # A: {name: chararray,age: integer,gpa: double} |
| # B: {name: chararray,age: integer} |
| # C: {name: chararray,age: integer} |
| # D: {group: chararray C: {name: chararray,age: integer}} |
| 'expected_out_regex' => |
| "D: {group: chararray,C: {\\(name: chararray,age: int\\)}}\nC: {name: chararray,age: int}\nB: {name: chararray,age: int}\nA: {name: chararray,age: int,gpa: float}", |
| |
| }, |
| { |
| 'num' => 12, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); |
| B= foreach A generate name, age; |
| C= filter B by age > 30; |
| D= group C by name; |
| describe X; |
| describe D; |
| describe C; |
| describe B; |
| describe A;\, |
| # EXPECT |
| # A: {name: chararray,age: integer,gpa: double} |
| # B: {name: chararray,age: integer} |
| # C: {name: chararray,age: integer} |
| # D: {group: chararray C: {name: chararray,age: integer}} |
| 'expected_err_regex' => "ERROR 1003: Unable to find an operator for alias X" |
| |
| }, |
| { |
| 'num' => 13, |
| 'pig' => q\ |
| A = LOAD ':INPATH:/singlefile/studenttab10k' AS (name: chararray, age: int, gpa: float); |
| B = LOAD 'voter_data' AS (name: chararray, age: int, registration: chararray, contributions: float); |
| C = COGROUP A BY name, B BY name; |
| D = FOREACH C GENERATE group, flatten((not IsEmpty(A) ? A : (bag{tuple(chararray, int, float)}){(null, null, null)})), flatten((not IsEmpty(B) ? B : (bag{tuple(chararray, int, chararray, float)}){(null,null,null, null)})); |
| describe D;\, |
| # EXPECT |
| # D: {group: chararray,A::name: chararray,A::age: int,A::gpa: float,B::name: chararray,B::age: int,B::registration: chararray,B::contributions: float} |
| 'expected_out_regex' => "D: {group: chararray,A::name: chararray,A::age: int,A::gpa: float,B::name: chararray,B::age: int,B::registration: chararray,B::contributions: float}" |
| |
| } |
| ], |
| }, |
| { |
| 'name' => 'Unicode_cmdline', |
| 'floatpostprocess' => 0, |
| 'delimiter' => ' ', |
| 'tests' => [ |
| |
| { |
| 'num' => 1, |
| 'java_params' => ['-Dopt.fetch=false'], |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/unicode100' as (name:chararray); |
| dump A;\, |
| 'expected_out_regex' => ":Unicode_cmdline_1_output:" |
| |
| }, |
| ], |
| }, |
| |
| { |
| 'name' => 'Warning', |
| 'floatpostprocess' => 0, |
| 'execonly' => 'mapred,tez', # Warnings use counters, which don't work in local mode |
| 'delimiter' => ' ', |
| 'tests' => [ |
| |
| { |
| #Checking divide by zero warning |
| 'num' => 1, |
| 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); |
| b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int; |
| c = foreach b generate (norm_gpa is null? 0 :norm_gpa); |
| store c into ':OUTPATH:';\, |
| 'expected_err_regex' => ":Warning_1_err:", |
| }, |
| { |
| #Checking field discarded warning |
| 'num' => 2, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age:int, gpa:double); |
| b = foreach a generate (int)name; |
| store b into ':OUTPATH:';\, |
| 'expected_err_regex' => "Encountered Warning FIELD_DISCARDED_TYPE_CONVERSION_FAILED 10000 time.*", |
| }, |
| { |
| #Checking type cast warnings |
| 'num' => 3, |
| 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age, gpa); |
| b = foreach a generate age + 1, gpa + 0.1f; |
| describe b;\, |
| 'expected_err_regex' => "(Encountered Warning IMPLICIT_CAST_TO_INT 1 time.*\n.*Encountered Warning IMPLICIT_CAST_TO_FLOAT 1 time.*)|(Encountered Warning IMPLICIT_CAST_TO_FLOAT 1 time.*\n.*Encountered Warning IMPLICIT_CAST_TO_INT 1 time.*)", |
| }, |
| { |
| #Checking udf warnings |
| 'num' => 4, |
| 'pig' => q\ |
| register :FUNCPATH:/testudf.jar; |
| a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age: int, gpa: float); |
| b = foreach a generate org.apache.pig.test.udf.evalfunc.TestWarningFunc(name, age, gpa); |
| store b into ':OUTPATH:';\, |
| 'expected_err_regex' => ":Warning_4_err:", |
| }, |
| { |
| #Checking non existent field warnings |
| 'num' => 5, |
| 'pig' => q\ |
| register :FUNCPATH:/testudf.jar; |
| a = load ':INPATH:/singlefile/studentnulltab10k'; |
| b = foreach a generate $3; |
| store b into ':OUTPATH:';\, |
| 'expected_err_regex' => "Encountered Warning ACCESSING_NON_EXISTENT_FIELD 10000 time.*", |
| }, |
| ], |
| }, |
| { |
| |
| # 1 Test that a nested foreach gives instant feedback: after user issue the foreach statement in Grunt |
| # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias |
| # 3 Describe single Alias resulting from a nested Foreach |
| # 4 Describe multiple Alias resulting from a nested Foreach |
| # 5 Describe resulting from a nested Foreach that contains a positional parameter |
| # 6 Describe for child Alias resulting from a nested Foreach where the child alias had multiple assignments The expected behavior is that the last assignment will destermine the result of the describe statement. |
| # 7 Describe for an Alias resulting from a nested Foreach where the projection for the nested alias is empty |
| # 8 Describe within a foreach statement |
| # 9 Describe for alias with complex data types |
| # 10 Describe that uses references an alias from an AS clause |
| |
| 'name' => 'NestedDescribe', |
| 'tests' => [ |
| { |
| # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias |
| # 3 Describe single Alias resulting from a nested Foreach |
| 'num' => 1, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B = group A by name; |
| C = foreach B { D = distinct A.age; generate COUNT(D), group;} |
| describe C::D; |
| \, |
| ,'expected_out_regex' => "D: {age: bytearray}" |
| |
| },{ |
| # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias |
| # 7 Describe for an Alias resulting from a nested Foreach where the projection for the nested alias is empty |
| 'num' => 2, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B = group A by name; |
| C = foreach B { D = distinct A.age; E= filter D by age > 1000; generate COUNT(E), group;} |
| describe C; |
| \, |
| ,'expected_out_regex' => "C: {long,group: bytearray}" |
| |
| |
| },{ |
| # 1 Test that a nested foreach gives instant feedback: after user issue the foreach statement in Grunt |
| # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias |
| # 3 Describe single Alias resulting from a nested Foreach |
| # 4 Describe multiple Alias resulting from a nested Foreach |
| # 5 Describe resulting from a nested Foreach that contains a positional parameter |
| 'num' => 3, |
| 'pig' => q\ |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B = group A by name; |
| C = foreach B { D = distinct A.$1; generate COUNT(D), group;} |
| describe C::D; |
| \, |
| ,'expected_out_regex' => "D: {age: bytearray}" |
| |
| |
| }, |
| ] |
| |
| } |
| ] |
| } |
| ; |