#!/usr/bin/env perl
############################################################################           
#  Licensed to the Apache Software Foundation (ASF) under one or more                  
#  contributor license agreements.  See the NOTICE file distributed with               
#  this work for additional information regarding copyright ownership.                 
#  The ASF licenses this file to You under the Apache License, Version 2.0             
#  (the "License"); you may not use this file except in compliance with                
#  the License.  You may obtain a copy of the License at                               
#                                                                                      
#      http://www.apache.org/licenses/LICENSE-2.0                                      
#                                                                                      
#  Unless required by applicable law or agreed to in writing, software                 
#  distributed under the License is distributed on an "AS IS" BASIS,                   
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
#  See the License for the specific language governing permissions and                 
#  limitations under the License.                                                      
                                                                                       
###############################################################################
$cfg = {
        'driver' => 'Pig',
        'nummachines' => 5,
        'verify_with_pig' => 1,
        'verify_pig_version' => 'old',

        'groups' => [
                {
                'name' => 'Orc',
                'tests' => [
# Test 1: Load (primitive) from PigStorage and store into OrcStorage
# Also tests multiple load stores in same script
                        {
                        'num' => 1,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
store a into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = filter b by age < 30;
store c into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
b = filter a by age < 30;
store b into ':OUTPATH:';\,
                        },
# Test 2: Load (complex) from PigStorage and store into OrcStorage
                        {
                        'num' => 2,
                        'notmq' => 1,
                        'execonly' => 'mapred,tez',
                        'pig' => q\
a = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)});
store a into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
store b into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)});
store a into ':OUTPATH:';\,
                        },
# Test 3: Aggregation test using two ORCStorage datasets
# Also incorporates handling bytearrays
                        {
                        'num' => 3,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float);
store a into ':OUTPATH:.simple.intermediate' using OrcStorage();
exec
b = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)}, nameagegpamap_name:chararray, nameagegpamap_age:int, nameagegpamap_gpa:float);
store b into ':OUTPATH:.complex.intermediate' using OrcStorage();
exec
c = load ':OUTPATH:.simple.intermediate' using OrcStorage();
d = load ':OUTPATH:.complex.intermediate' using OrcStorage();
e = foreach c generate name, age, gpa;
f = foreach d generate nameagegpamap#'name' as name, nameagegpamap#'age' as age, nameagegpamap#'gpa' as gpa, nameagegpatuple.tage as tage, FLATTEN(nameagegpabag) as (bname, bage, bgpa);
g = join e by name, f by name;
h = group g by (f::bgpa);
j = foreach h generate group, COUNT(g) as students;
k = order j by group, students;
store k into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float);
b = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)}, nameagegpamap_name:chararray, nameagegpamap_age:int, nameagegpamap_gpa:float);
c = foreach a generate name, age, gpa;
d = foreach b generate nameagegpamap#'name' as name, nameagegpamap#'age' as age, nameagegpamap#'gpa' as gpa, nameagegpatuple.tage as tage, FLATTEN(nameagegpabag) as (bname, bage, bgpa);
e = join c by name, d by name;
f = group e by (d::bgpa);
g = foreach f generate group, COUNT(e) as students;
h = order g by group, students;
store h into ':OUTPATH:';\,
                        },
# Tests 4 : Test various properties passed to ORCStorage
                        {
                        'num' => 4,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
store a into ':OUTPATH:.orc_params.intermediate' using OrcStorage('-c ZLIB -s 67108864 -r 100000 -b 1048576 -p true -v 0.12');
exec
b = load ':OUTPATH:.orc_params.intermediate' using OrcStorage();
store b into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
store a into ':OUTPATH:';\,
                        },
# Tests 5 : Test loading null map key
                        {
                        'num' => 5,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age);
b = foreach a generate TOMAP(name, age) as m;
store b into ':OUTPATH:.intermediate' using OrcStorage();
exec
c = load ':OUTPATH:.intermediate' using OrcStorage();
store c into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age);
b = foreach a generate (name is null ? [] : TOMAP(name, age));
store b into ':OUTPATH:';\,
                        },
# Test 6 : Running for Spark only as a replacement of Test 2: Spark and MR may produce different order of entries in
# Pig maps, which although is fine, triggers a false failure during comparison
                        {
                          'num' => 6,
                          'notmq' => 1,
                          'execonly' => 'spark',
                          'pig' => q\
a = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)});
store a into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = foreach b generate nameagegpamap#'name', nameagegpamap#'age', nameagegpamap#'gpa', nameagegpatuple, nameagegpabag;
store c into ':OUTPATH:';\,
                          'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)});
b = foreach a generate nameagegpamap#'name', nameagegpamap#'age', nameagegpamap#'gpa', nameagegpatuple, nameagegpabag;
store b into ':OUTPATH:';\,
                        }
                        ]
                },
                {
                'name' => 'Orc_Pushdown',
                'tests' => [
# Test 1: Load (primitive) from PigStorage and store into OrcStorage
# Also tests multiple load stores in same script
                        {
                        'num' => 1,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
b = order a by name parallel 4;
store b into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = filter b by name < 'david falkner';
store c into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
b = filter a by name < 'david falkner';
store b into ':OUTPATH:';\,
                        },
                        {
                        'num' => 2,
                        'notmq' => 1,
                        'execonly' => 'mapred,tez,spark', # studenttab20m not available in local mode
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:float);
b = order a by age desc parallel 4;
store b into ':OUTPATH:.intermediate' using OrcStorage('-s 10000000');
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = filter b by age <= 22;
store c into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:float);
b = filter a by age <= 22;
store b into ':OUTPATH:';\,
                        },
                        {
                        'num' => 3,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
b = order a by gpa parallel 4;
store b into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = filter b by gpa >= 3.2 and gpa < 3.5 and age > 30 + 2;
store c into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
b = filter a by gpa >= 3.2 and gpa < 3.5 and age > 30 + 2;
store b into ':OUTPATH:';\,
                        },
                        {
                        'num' => 4,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:bigdecimal);
b = order a by gpa parallel 4;
store b into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = filter b by gpa >= 3.5;
store c into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:bigdecimal);
b = filter a by gpa >= 3.5;
store b into ':OUTPATH:';\,
                        'floatpostprocess' => 1,
                        'delimiter' => '	',
                        },
                        {
                        'num' => 5,
                        'notmq' => 1,
                        'pig' => q\
a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = foreach a generate name, age, gpa, (age>35 ? ToDate('20100101', 'yyyyMMdd', 'UTC') : ToDate('20100105', 'yyyyMMdd', 'UTC')) as d;
c = order b by d parallel 4;
store c into ':OUTPATH:.intermediate' using OrcStorage();
exec
b = load ':OUTPATH:.intermediate' using OrcStorage();
c = filter b by d >= ToDate('20100103', 'yyyyMMdd', 'UTC');
d = foreach c generate name, age, gpa;
store d into ':OUTPATH:';\,
                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:bigdecimal);
b = filter a by age<=35;
store b into ':OUTPATH:';\,
                        'floatpostprocess' => 1,
                        'delimiter' => '	',
                        },
                        ]
                },

        ]
};

