# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

###############################################################################
# Nightly tests for pig.
#
# Author:  Alan F. Gates (gates@)
# $Header:$
#

#use Yahoo::Miners::Test::PigSetup;

#PigSetup::setup();

#my $me = `whoami`;
#chomp $me;

$cfg = {
  'driver' => 'Pig',

  'groups' => [
    {
      'name' => 'Macro_DefinitionAndInline',
      'tests' => [
        {
          # simple macro, no args
          'num' => 1,
          'pig' => q#define simple_macro() returns void {
                         a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                         b = foreach a generate age, name;
                         store b into ':OUTPATH:';
                     }

                     simple_macro();#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = foreach a generate age, name;
                                   store b into ':OUTPATH:';#,
        },{
          # input args, no return
          'num' => 2,
          'pig' => q#define simple_macro(loadfile) returns void {
                         a = load '$loadfile' as (name, age, gpa);
                         b = foreach a generate age, name;
                         store b into ':OUTPATH:';
                     }

                     simple_macro(':INPATH:/singlefile/studenttab10k');#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = foreach a generate age, name;
                                   store b into ':OUTPATH:';#,
        },{
          # input args, return value
          'num' => 3,
          'pig' => q#define simple_macro(loadfile) returns b {
                         a = load '$loadfile' as (name, age, gpa);
                         $b = foreach a generate age, name;
                     }

                     x = simple_macro(':INPATH:/singlefile/studenttab10k');
                     store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = foreach a generate age, name;
                                   store b into ':OUTPATH:';#,
        },
        {
          # input args, filter on double and int, return value
          'num' => 4,
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
                         b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
                         $c = foreach b generate age, name;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     x = simple_macro(a, '3.0', '40');
                     store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
        },
        {
          #Definition multiple input, no output, multiple returns value    
          #x = with multiple input, no output, multiple returns value    
          #Query based on FilterEq from nightly.conf
          'num' => 5, 
          'pig' => q\define test (in1, in2) returns r1, r2 {
                          a = load '$in1' using PigStorage() as (name, age, gpa);
                          $r1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
                          b = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
                          $r2 = filter b by name matches 'f.ed' and (chararray)registration matches 'd.m';
                      }

                      x1, x2 = test(':INPATH:/singlefile/studenttab10k', ':INPATH:/singlefile/votertab10k');

                      store x1 into ':OUTPATH:.1' using PigStorage;
                      store x2 into ':OUTPATH:.2' using PigStorage;\,


          'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   a1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
                                   store a1 into ':OUTPATH:.1' using PigStorage;
                                   b = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
                                   b2 = filter b by name matches 'f.ed' and (chararray)registration matches 'd.m';
                                   store b2 into ':OUTPATH:.2' using PigStorage;\,
          'floatpostprocess' => 1,
          'delimiter' => '  ',
        },
        {
          # use positional parameters inside macro
          'num' => 6,
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
                         b = filter $in_relation by $2 >= $min_gpa and $1 <= $max_age;
                         $c = foreach b generate age, name;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     x = simple_macro(a, '3.0', '40');
                     store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
        },
        {
          # Test nested macros
          'num' => 7,
          'pig' => q\define sum_it(in, relation, scol) returns d {
                          $d = foreach $in generate group, SUM($relation.$scol);
                      }

                      define group_it(in_relation, group_key, sum_col) returns c {
                          b = group $in_relation by $group_key ;
                          $c = sum_it(b, $in_relation, $sum_col);
                      }

                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = group_it(a, 'name', 'age');
                      store x into ':OUTPATH:';\,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = group a by name;
                                   c = foreach b generate group, SUM(a.age);
                                   store c into ':OUTPATH:';#,
        },
        {
          # single macro definition invoked multiple times
          'num' => 8,
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
                         b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
                         $c = foreach b generate age, name;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     x = simple_macro(a, '3.0', '40');
                     store x into ':OUTPATH:.1';

                     z = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     y = simple_macro(a, '2.0', '50');
                     store y into ':OUTPATH:.2';#,

          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:.1';

                                   d = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   e = filter d by gpa >= 2.0 and age <= 50;
                                   f = foreach e generate age, name;
                                   store f into ':OUTPATH:.2';#,
        },
        {
          # macro arg used as function arg
          'num' => 9,
          'pig' => q#define simple_macro(loadfile, sep) returns b {
                         a = load '$loadfile' using PigStorage('$sep') as (name, age, gpa);
                         $b = foreach a generate age, name;
                     }

                     x = simple_macro(':INPATH:/singlefile/studentcolon10k', ':');
                     store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
                                   b = foreach a generate age, name;
                                   store b into ':OUTPATH:';#,
        },
        {
          # Multiple returns via split in the data flow
          'num' => 10, 
          'pig' => q\define test (in1) returns r1, r2 {
                          a = load '$in1' using PigStorage() as (name, age, gpa);
                          $r1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
                          $r2 = filter a by name > 'fred';
                      }

                      x1, x2 = test(':INPATH:/singlefile/studenttab10k');

                      store x1 into ':OUTPATH:.1' using PigStorage;
                      store x2 into ':OUTPATH:.2' using PigStorage;\,


          'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   a1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
                                   store a1 into ':OUTPATH:.1' using PigStorage;
                                   a2 = filter a by name > 'fred';
                                   store a2 into ':OUTPATH:.2' using PigStorage;\,
          'floatpostprocess' => 1,
          'delimiter' => '  ',
        },
        {
          # parameter substitution at the top level
          'num' => 11,
          'pig_params' => ['-p', qq(loadfile='singlefile/studenttab10k')],
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
                         b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
                         $c = foreach b generate age, name;
                     }

                     a = load ':INPATH:/$loadfile' as (name, age, gpa);
                     x = simple_macro(a, '3.0', '40');
                     store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
        }
      ]
    },
    {
      'name' => 'Macro_Scope',
      'tests' => [
        {
          # re-use of variable in macro and global scope
          'num' => 1,
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns b {
                         $b = filter $in_relation by $2 >= $min_gpa and $1 <= $max_age;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     x = simple_macro(a, '3.0', '40');
                     b = foreach x generate age, name;
                     store b into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
        },
        {
          #Definition where there is a name collision between parameters in parent/child macro
          'num' => 2,
          'pig' => q\define sum_it(in_relation, relation, sum_col) returns c {
                          b = foreach $in_relation generate group, SUM($relation.$sum_col);
                          $c = order b by $1;
                      }

                      define group_it(in_relation, group_key, sum_col) returns c {
                          b = group $in_relation by $group_key ;
                          $c = sum_it(b, $in_relation, $sum_col);
                      }

                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = group_it(a, name, age);
                      store x into ':OUTPATH:';\,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = group a by name;
                                   c = foreach b generate group, SUM(a.age);
                                   store c into ':OUTPATH:';#,
        },
        {
          #Definition where there is a name collision between macro and returns value in main pig script    
          'num' => 3,
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
                         b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
                         $c = foreach b generate age, name;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     c = simple_macro(a, '3.0', '40');
                     store c into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
        } 
      ]
    },
    {
      'name' => 'Macro_Schema',
      'tests' => [
        {
          # macro that does not change the schema
          'num' => 1,
          'pig' => q\define test(in) returns a {
                         $a = filter $in by age > 30;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
                     b = test(a);
                     describe b;\,
          'rc' => 0,
          'expected_out_regex'=> "b: \{name: chararray,age: int,gpa: double}"
        },
        {
          # macro that does change the schema
          'num' => 2,
          'pig' => q\define test(in) returns a {
                         $a = foreach $in generate name;
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
                     b = test(a);
                     describe b;\,
          'rc' => 0,
          'expected_out_regex'=> "b: \{name: chararray}"
        }
      ]
    },
    {
      'name' => 'Macro_Misc',
      'tests' => [
        {
          #Comments in macro
          'num' => 1,
          'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
                         b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
                         $c = foreach b generate age, name;
                         -- add a comment
                     }

                     a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                     x = simple_macro(a, '3.0', '40');
                     store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
        },
        {
          #register
          'num' => 2,
          'pig' => q\define test (in) returns b {
                          $b = foreach $in generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
                      }

                      register :FUNCPATH:/testudf.jar;
                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = test(a);
                      store x into ':OUTPATH:';\,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   register :FUNCPATH:/testudf.jar;
                                   b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
                                   store b into ':OUTPATH:';#,
        },
        {
          #define for streaming combines with define for macros
          'num' => 3,
          ,'pig' => q#define CMD `perl -ne 'print $_;'`;
                      define test(in) returns B {
                          $B = stream $in through CMD as (name, age, gpa);
                      }

                      A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = test(A);
                      store x into ':OUTPATH:';#,
          'pig_win' => q#define CMD `perl -ne "print $_;"`;
                      define test(in) returns B {
                          $B = stream $in through CMD as (name, age, gpa);
                      }

                      A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = test(A);
                      store x into ':OUTPATH:';#,
          'verify_pig_script' => q#A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   store A into ':OUTPATH:';#,
          'floatpostprocess' => 1,
          'delimiter' => '  ' 
        },
        {
                #JIRA: PIG-2681
                'num' => 4,
               ,'pig' => q\
                    define test (in,out) returns b {
                       a = load '$in' as (name, age, gpa);
                       $b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
                       store $b into '$out';
}
                       register :FUNCPATH:/testudf.jar;
                       x = test(':INPATH:/singlefile/studenttab10k',':OUTPATH:');
\,
        }
      ]
    },
    {
       'name' => 'Macro_Import',
       'tests' => [
         {
           'num' => 1,
           'pig' => q#import ':SCRIPTHOMEPATH:/macro1.pig';
                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = simple_macro(a, '3.0', '40');
                      store x into ':OUTPATH:';#,
          'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                                   b = filter a by gpa >= 3.0 and age <= 40;
                                   c = foreach b generate age, name;
                                   store c into ':OUTPATH:';#,
          }

        ]
      },
      {
       'name' => 'Macro_Error',
       'tests' => [
         {
           # parameter names repeated
           'num' => 1,
           'ignore' => 'https://issues.apache.org/jira/browse/PIG-2247',
           'pig' => q#define simple_macro(in_relation, min_gpa, min_gpa) returns c {
                          b = filter $in_relation by gpa >= $min_gpa;
                          $c = foreach b generate age, name;
                      }
 
                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = simple_macro(a, '3.0', '40');
                      store x into ':OUTPATH:';#,
            'expected_err_regex' => "Multiple arguments min_gpa found"
         },
         {
           # undefined parameter in macro
           'num' => 2,
           'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
                          b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
                          $c = foreach b generate age, name;
                      }

                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = simple_macro(a, '3.0');
                      store x into ':OUTPATH:';#,
            'expected_err_regex' => "Macro inline failed for macro 'simple_macro'. Reason: Undefined parameter : max_age"
          },
          {
            # name collision between arg and return value
            'num' => 3,
            'pig' => q#define simple_macro(in_relation, min_gpa, c) returns c {
                           b = filter $in_relation by gpa >= $min_gpa and age <= $c;
                           $c = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = simple_macro(a, '3.0', '40');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "Multiple values found for c"
          },
          {
            # keyword as macro name
            'num' => 4,
            'pig' => q#define foreach(in_relation, min_gpa) returns c {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = simple_macro(a, '3.0', '40');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "mismatched input 'foreach' expecting IDENTIFIER"
          },
          {
            # UDF as macro name
            'num' => 5,
            'ignore' => 'https://issues.apache.org/jira/browse/PIG-2248',
            'pig' => q#define COUNT(in_relation, min_gpa) returns c {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = COUNT(a, '3.0');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "macro name hides UDF COUNT"
          },
          {
            # redefine a macro
            'num' => 6,
            'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }

                       define simple_macro(in, min_age) returns d {
                           b = filter $in by age >= $min_age;
                           $d = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = simple_macro(a, '3.0', '40');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "Duplicated macro name 'simple_macro'"
          },
          {
            # invoke non-existent macro
            'num' => 7,
            'pig' => q#
                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = nosuch_macro('a', '3.0', '40');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "Cannot expand macro 'nosuch_macro'. Reason: Macro must be defined before expansion."
          },
          {
            # Specifies two returns, but only actually returns one
            'num' => 8,
            'pig' => q#define simple(in_relation, min_gpa) returns c,d {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x, y = simple(a, '3.0');
                       store x into ':OUTPATH:.1';
                       store y into ':OUTPATH:.2';#,
            'expected_err_regex' => "Invalid macro definition: . Reason: Macro 'simple' missing return alias: d"
          },
          {
            # syntax error in a macro, check for correct line number
            'num' => 9,
            'pig' => q#define simple(in_relation, min_gpa) returns c {
                           b = fiter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = simple(a, '3.0');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "line 2"
          },
          {
           # too many args passed to macro
           'num' => 10,
           'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
                          b = filter $in_relation by gpa >= $min_gpa;
                          $c = foreach b generate age, name;
                      }

                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = simple_macro(a, '3.0', '40');
                      store x into ':OUTPATH:';#,
            'expected_err_regex' => "Failed to expand macro 'simple_macro'. Reason: Expected number of parameters: 2 actual number of inputs: 3"
          },
          {
            # return two values, but script only accepts 1
            'num' => 11,
            'pig' => q#define simple(in_relation, min_gpa) returns c,d {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                           $d = foreach b generate name, age;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x = simple(a, '3.0');
                       store x into ':OUTPATH:';#,
            'expected_err_regex' => "Failed to expand macro 'simple'. Reason: Expected number of return aliases: 2 actual number of return values: 1"
          },
          {
            # return 1 value, but script expects 2
            'num' => 12,
            'pig' => q#define simple(in_relation, min_gpa) returns c {
                           b = filter $in_relation by gpa >= $min_gpa;
                           $c = foreach b generate age, name;
                       }

                       a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                       x, y = simple(a, '3.0');
                       store x into ':OUTPATH:.1';
                       store y into ':OUTPATH:.2';#,
            'expected_err_regex' => "Failed to expand macro 'simple'. Reason: Expected number of return aliases: 1 actual number of return values: 2"
          }
        ]
      },
      {
       'name' => 'Macro_Import_Error',
       'tests' => [
         {
           # import non-existent file
           'num' => 1,
           'ignore' => 1, # different error message for different version of hadoop
           'pig' => q#import 'nosuchfile';
 
                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = simple_macro('a', '3.0', '40');
                      store x into ':OUTPATH:';#,
            'expected_err_regex' => "Failed to import file 'nosuchfile'. Reason: Can't find the Specified file nosuchfile"
         },
         {
           # import a macro with a syntax error
           'num' => 2,
           'pig' => q#import ':SCRIPTHOMEPATH:/macro_bad1.pig';
                      a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
                      x = simple_macro(a, '3.0', '40');
                      store x into ':OUTPATH:';#,
            'expected_err_regex' => "Invalid macro definition"
         }
        ]
      }
    ],
  },
;

# import non-existent file, import script with error



