| #!/usr/bin/env perl |
| |
| ############################################################################ |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ############################################################################### |
| # Tests for Jython embedding |
| # |
| # |
| |
| our $PASSED="Pig job PASSED"; |
| our $FAILED="Pig job FAILED"; |
| |
| |
| $cfg = { |
| |
| 'driver' => 'Pig', |
| ,'groups' => [ |
| { |
| 'name' => 'Jython_Checkin', |
| 'tests' => [ |
| { |
| 'num' => 2, |
| 'floatpostprocess' => 1, |
| 'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input1=':INPATH:/singlefile/studenttab10k' |
| input2=':INPATH:/singlefile/votertab10k' |
| output=':OUTPATH:' |
| |
| P = Pig.compile(""" |
| a = load '$in1' as (name, age, gpa); |
| b = load '$in2' as (name, age, registration, contributions); |
| c = filter a by age < 50; |
| d = filter b by age < 50; |
| e = cogroup c by (name, age), d by (name, age) ; |
| f = foreach e generate flatten(c), flatten(d); |
| g = group f by registration; |
| h = foreach g generate group, (float) ROUND(SUM(f.d::contributions) * 100) / 100.0; |
| i = order h by $1; |
| store i into '$out'; |
| """).bind({'in1':input1,'in2':input2, 'out':output}).runSingle() |
| if P.isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| |
| \, |
| 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); |
| c = filter a by age < 50; |
| d = filter b by age < 50; |
| e = cogroup c by (name, age), d by (name, age) ; |
| f = foreach e generate flatten(c), flatten(d); |
| g = group f by registration; |
| h = foreach g generate group, (float) ROUND(SUM(f.d::contributions) * 100) / 100.0; |
| i = order h by $1; |
| store i into ':OUTPATH:'; |
| \, |
| 'sortArgs' => ['-t', ' ', '-k', '2,2'], |
| }, { |
| 'num' => 3, |
| #jython uses 'python.home'/cachedir when python.cachedir is not specified. |
| #To test python.cachedir is set correctly by the framework, |
| #setting python.home to a random path |
| 'java_params' => ['-Dpython.home=/dev/null/fake'], |
| 'pig' => q\#!/usr/bin/python |
| import sys |
| |
| from org.apache.hadoop.conf import * |
| from org.apache.hadoop.fs import * |
| |
| config = Configuration() |
| hdfs = FileSystem.get(config) |
| \, |
| ,'rc' => 0 |
| } |
| ] |
| |
| }, { |
| |
| 'name' => 'Jython_CompileBindRun' |
| ,'tests' => [ |
| { |
| # bind no parameters, runSingle |
| 'num' => 1 |
| ,'pig' => q\#!/usr/bin/python |
| # JYTHON COMMENT |
| from org.apache.pig.scripting import Pig |
| |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:';""") |
| |
| Q = P.bind() |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:';\, |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| |
| },{ |
| # bind single input parameter |
| 'num' => 2 |
| ,'pig' => q\#!/usr/bin/python |
| |
| from org.apache.pig.scripting import Pig |
| |
| input= ":INPATH:/singlefile/studenttab10k" |
| output = ":OUTPATH:" |
| |
| P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into ':OUTPATH:';""") |
| |
| Q = P.bind({'in':input}) |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:';\, |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| |
| # ,'expected_out_regex' => "Pig job PASSED" |
| },{ |
| # bind parallel execution with a multiple entries |
| 'num' => 3 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input= ":INPATH:/singlefile/studenttab10k" |
| max1="50"; |
| output1= ":OUTPATH:.1" |
| max2="40.0"; |
| output2= ":OUTPATH:.2" |
| max3="30.0f"; |
| output3= ":OUTPATH:.3" |
| |
| P = Pig.compile(""" |
| A = load '$in' as (name, age, gpa); |
| B= filter A by age < $max; |
| C = foreach B generate name; |
| store C into '$out'; |
| -- store C into put here as comment to fake multi-query |
| """) |
| |
| Q = P.bind([ |
| {'in':input ,'max':max1 ,'out':output1 } |
| ,{'in':input ,'max':max2 ,'out':output2 } |
| ,{'in':input ,'max':max3 ,'out':output3 } |
| ]) |
| |
| results = Q.run() |
| for i in [0, 1, 2]: |
| result = results[i] |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= filter A by age < 50; |
| C = foreach B generate name; |
| store C into ':OUTPATH:.1'; |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= filter A by age < 40.0; |
| C = foreach B generate name; |
| store C into ':OUTPATH:.2'; |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= filter A by age < 30.0f; |
| C = foreach B generate name; |
| store C into ':OUTPATH:.3'; |
| \, |
| |
| },{ |
| # compile pig script file with no parameters |
| 'num' => 4 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| #create pig script |
| pig_script = ":TMP:/script.pig" |
| pigfile = open( pig_script, 'w+') |
| pigfile.write(""" |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| -- a comment |
| store A into ':OUTPATH:'; |
| """) |
| pigfile.close() |
| |
| #execute pig script |
| |
| result = Pig.compileFromFile( pig_script ).bind().runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:'; |
| \ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # compile pig script file with parameters |
| 'num' => 5 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| #create pig script |
| |
| from org.apache.pig.scripting import Pig |
| |
| #create pig script |
| |
| pig_script = ":TMP:/script.pig" |
| pigfile = open( pig_script, 'w+') |
| pigfile.write(""" |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into '$out'; |
| """) |
| pigfile.close() |
| |
| #execute pig script |
| output= ":OUTPATH:" |
| |
| result = Pig.compileFromFile(pig_script).bind({'out':output}).runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:'; |
| \ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| |
| },{ |
| # results.getResults(alias) for null results |
| 'num' => 6 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| EMPTY= filter A by age > 9999; |
| store EMPTY into ':OUTPATH:'; |
| """) |
| |
| result = P.bind().runSingle() |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| EMPTY= filter A by age > 9999; |
| store EMPTY into ':OUTPATH:';\ |
| }, |
| { |
| # bind parameters from python context |
| 'num' => 7 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| P = Pig.compile("""A = load '$inpath' as (name, age, gpa); |
| B= filter A by age < $max; |
| store B into '$out'; |
| """) |
| |
| #execute pig script |
| inpath= ":INPATH:/singlefile/studenttab10k" |
| out= ":OUTPATH:" |
| max = 40 |
| |
| result = P.bind().runSingle() |
| \, |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= filter A by age < 40; |
| store B into ':OUTPATH:';\ |
| |
| },{ |
| # bind multiple times |
| 'num' => 8 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| P = Pig.compile("""A = load '$inpath' as (name, age, gpa); |
| B= foreach A generate age + $i; |
| store B into '$out'; |
| -- extra store B into to force multiquery |
| """) |
| |
| #execute pig script |
| inpath= ":INPATH:/singlefile/studenttab10k" |
| for i in [1,2,3]: |
| out= ":OUTPATH:." + str(i) |
| result = P.bind().runSingle() |
| \, |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= foreach A generate age + 1; |
| store B into ':OUTPATH:.1'; |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= foreach A generate age + 2; |
| store B into ':OUTPATH:.2'; |
| A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= foreach A generate age + 3; |
| store B into ':OUTPATH:.3';\, |
| |
| },{ |
| # python script with parameters |
| 'num' => 11 |
| ,'pig_params' => ['-p', qq(loadfile='studenttab10k')], |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/$loadfile' as (name, age, gpa); |
| store A into ':OUTPATH:';""") |
| |
| Q = P.bind() |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:'; |
| \ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # python script with parameter file |
| 'num' => 12 |
| ,'pig_params' => ['-m', ":PARAMPATH:/params_3"], |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/$fname' as (name, age, gpa); |
| store A into ':OUTPATH:';""") |
| |
| Q = P.bind() |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:'; |
| \ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| },{ |
| # python script with command line arguments |
| 'num' => 13 |
| ,'additional_cmd_args' => ['studenttab10k'] |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| import sys |
| |
| P = Pig.compile("A = load ':INPATH:/singlefile/" + sys.argv[1] + "' as (name, age, gpa);" + |
| "store A into ':OUTPATH:';"); |
| |
| Q = P.bind() |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| \, |
| |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:'; |
| \ |
| ,'floatpostprocess' => 1 |
| ,'delimiter' => ' ' |
| } |
| ] |
| },{ |
| 'name' => 'Jython_Diagnostics' |
| ,'tests' => [ |
| { |
| # explain() on a complex query |
| 'num' => 1 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1 = ":OUTPATH:.1" |
| output2 = ":OUTPATH:.2" |
| maximum="20"; |
| |
| |
| #No Schema specified |
| P = Pig.compile(""" |
| A = load '$in1' as (name, age, gpa); |
| B= filter A by age < $max; |
| C= foreach B generate name,age; |
| store C into '$out1'; |
| D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| E= filter D by age < $max; |
| F= foreach E generate name,age; |
| store F into '$out2'; |
| """) |
| |
| result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).explain() |
| \ |
| |
| ,'rc'=> 0 |
| |
| }, { |
| # illustrate() on a complex query |
| 'num' => 2 |
| ,'execonly' => 'mapred,local' #TODO: PIG-3993,PIG-5204: Illustrate is yet to be implemented in Tez and in Spark |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1 = ":OUTPATH:.1" |
| output2 = ":OUTPATH:.2" |
| maximum="20"; |
| |
| |
| #No Schema specified |
| P = Pig.compile(""" |
| A = load '$in1' as (name, age, gpa); |
| B= filter A by age < $max; |
| C= foreach B generate name,age; |
| store C into '$out1'; |
| D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| E= filter D by age < $max; |
| F= foreach E generate name,age; |
| store F into '$out2'; |
| """) |
| |
| result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).illustrate(); |
| \ |
| ,'rc'=> 0 |
| ,'expected_out_regex' => "A.*name:bytearray.*age:bytearray.*gpa:bytearray" |
| }, { |
| # describe() on an alias |
| 'num' => 3 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1 = ":OUTPATH:.1" |
| output2 = ":OUTPATH:.2" |
| maximum="20"; |
| |
| |
| #No Schema specified |
| P = Pig.compile(""" |
| A = load '$in1' as (name, age, gpa); |
| B= filter A by age < $max; |
| C= foreach B generate name,age; |
| store C into '$out1'; |
| D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| E= filter D by age < $max; |
| F= foreach E generate name,age; |
| store F into '$out2'; |
| """) |
| |
| result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).describe('A'); |
| \ |
| ,'rc'=> 0 |
| ,'expected_out_regex' => "A:.*{name:.*bytearray,age:.*bytearray,gpa:.*bytearray}" |
| }, { |
| # describe() on an undefined alias |
| 'num' => 4 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1 = ":OUTPATH:.1" |
| output2 = ":OUTPATH:.2" |
| maximum="20"; |
| |
| |
| #No Schema specified |
| P = Pig.compile(""" |
| A = load '$in1' as (name, age, gpa); |
| B= filter A by age < $max; |
| C= foreach B generate name,age; |
| store C into '$out1'; |
| D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| E= filter D by age < $max; |
| F= foreach E generate name,age; |
| store F into '$out2'; |
| """) |
| |
| result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).describe('INVALID_ALIAS'); |
| \ |
| ,'rc'=> 6 |
| ,'expected_err_regex' => "ERROR 1003: Unable to find an operator for alias INVALID_ALIAS" |
| |
| |
| }, { |
| # illustrate(alias) |
| 'num' => 5 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1 = ":OUTPATH:.1" |
| output2 = ":OUTPATH:.2" |
| maximum="20"; |
| |
| |
| #No Schema specified |
| P = Pig.compile(""" |
| A = load '$in1' as (name, age, gpa); |
| B= filter A by age < $max; |
| C= foreach B generate name,age; |
| store C into '$out1'; |
| D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| E= filter D by age < $max; |
| F= foreach E generate name,age; |
| store F into '$out2'; |
| """) |
| |
| result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).illustrate('D'); |
| \ |
| ,'rc'=> 6 |
| ,'expected_err_regex' => "ERROR 1121" |
| |
| }, { |
| # explain(alias) |
| 'num' => 6 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1 = ":OUTPATH:.1" |
| output2 = ":OUTPATH:.2" |
| maximum="20"; |
| |
| |
| #No Schema specified |
| P = Pig.compile(""" |
| A = load '$in1' as (name, age, gpa); |
| B= filter A by age < $max; |
| C= foreach B generate name,age; |
| store C into '$out1'; |
| D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| E= filter D by age < $max; |
| F= foreach E generate name,age; |
| store F into '$out2'; |
| """) |
| |
| result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).explain('C') |
| \ |
| ,'rc'=> 6 |
| ,'expected_err_regex' => "ERROR 1121" |
| # ,'expected_err_regex' => "TypeError: explain(): expected 0 args; got 1" |
| |
| } |
| ] |
| }, { |
| 'name' => 'Iterator' |
| ,'tests' => [ |
| { |
| 'num' => 1 |
| ,'pig' => q@#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k'; |
| store A into ':OUTPATH:.tmp'; |
| """) |
| |
| #execute pig script |
| result = P.bind().runSingle() |
| |
| if not result.isSuccessful(): |
| raise "Pig job FAILED" |
| |
| pigfile = open(":TMP:/iterator_output.txt", 'wb+') |
| iter = result.result("A").iterator() |
| while iter.hasNext(): |
| t = iter.next() |
| name = t.get(0) |
| age = t.get(1) |
| gpa = t.get(2) |
| pigfile.write(str(name) + "\\t" + str(age) + "\\t" + str(gpa) + "\\n") |
| pigfile.close() |
| Pig.fs("-mkdir :OUTPATH:") |
| Pig.fs("-copyFromLocal :TMP:/iterator_output.txt :OUTPATH:/part-m-00000") |
| @, |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k'; |
| store A into ':OUTPATH:';\, |
| }, |
| ] |
| }, { |
| 'name' => 'Jython_Misc' |
| ,'tests' => [ |
| { |
| # fs commands: lists a file |
| 'num' => 1 |
| ,'pig' => q\#!/usr/bin/python |
| # JYTHON COMMENT |
| from org.apache.pig.scripting import Pig |
| |
| Pig.fs( "-ls :INPATH:/singlefile/studenttab10k"); |
| \ |
| |
| ,'expected_out_regex' => "studenttab10k" |
| |
| } |
| ] |
| }, { |
| |
| 'name' => 'Jython_Macro' |
| ,'tests' => [ |
| { |
| 'num' => 1 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| #create pig script |
| |
| # mymodule.pig |
| pig_script = ":TMP:/module.pig" |
| pigfile = open( pig_script, 'w+') |
| pigfile.write(""" |
| define group_and_count (A, group_key) returns B { |
| D = group $A by $group_key; |
| $B = foreach D generate group, COUNT($A); |
| }; |
| """) |
| pigfile.close() |
| |
| |
| #main.pig |
| |
| P = Pig.compile("""import ':TMP:/module.pig'; |
| alpha = load ':INPATH:/singlefile/studenttab10k' as (user, age, gpa); |
| gamma = group_and_count (alpha, user); |
| store gamma into ':OUTPATH:'; |
| """) |
| P.bind().runSingle() |
| \, |
| 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (user, age, gpa); |
| B = group A by user; |
| C = foreach B generate group, COUNT(A); |
| store C into ':OUTPATH:';\ |
| |
| } |
| ] |
| |
| }, { |
| # SET debug 'on' |
| # SET job.name 'my job' |
| # SET default_parallel 100 |
| |
| 'name' => 'Jython_Properties', |
| 'tests' => [ |
| { |
| # check if property is passed to Pig |
| 'num' => 1 |
| ,'pig' => q\#!/usr/bin/python |
| # JYTHON COMMENT |
| from org.apache.pig.scripting import Pig |
| from java.util import Properties; |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:';""") |
| |
| Q = P.bind() |
| |
| prop = Properties() |
| prop.put("pig.default.load.func", "wrong") |
| result = Q.runSingle(prop) |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| else: |
| raise "Pig job FAILED" |
| \ |
| ,'rc'=> 6 |
| ,'expected_err_regex' => "ERROR 1070: Could not resolve wrong using imports" |
| |
| } |
| ] |
| }, |
| { |
| 'name' => 'Jython_Error', |
| 'tests' => [ |
| { |
| # run a script that returns single negative result |
| 'num' => 1 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| input= ":INPATH:/singlefile/studenttab10k" |
| output = ":OUTPATH:" |
| |
| P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""") |
| |
| Q = P.bind({'in':bad_input, 'out':output}) |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| \ |
| |
| ,'rc' => 6 |
| ,'expected_err_regex' => "ERROR 1121" |
| },{ |
| # compileFromFile for pig script file that does not exist throws IOException |
| 'num' => 4 |
| ,'pig' => q\#!/usr/bin/python |
| import os |
| from org.apache.pig.scripting import Pig |
| |
| # intentionally don't create pig script |
| |
| pig_script = ":TMP:/script.pig" |
| |
| os.remove(pig_script) |
| |
| #execute pig script |
| input1= ":INPATH:/singlefile/studenttab10k" |
| input2= ":INPATH:/singlefile/votertab10k" |
| output1= ":OUTPATH:.1" |
| output2= ":OUTPATH:.2" |
| |
| results = Pig.compileFromFile(pig_script).bind({'in1':input1,'in2':input2, 'out1':output1, 'out2':output2 }).run() |
| |
| if results[0].isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| \ |
| |
| ,'expected_err_regex' => "ERROR 1121" |
| ,'rc'=> 6 |
| },{ |
| # compileFromFile for pig script file without python library |
| 'num' => 6 |
| ,'pig' => q\#!/usr/bin/python |
| # JYTHON COMMENT |
| #from org.apache.pig.scripting import Pig |
| |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| store A into ':OUTPATH:';""") |
| |
| Q = P.bind() |
| |
| result = Q.runSingle() |
| |
| if result.isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| \ |
| |
| ,'rc' => 6 |
| ,'expected_err_regex' => "ERROR 1121" |
| }, |
| { |
| # iter.next for an alias that is undefined |
| 'num' => 7 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| #create pig script |
| |
| out1= ":OUTPATH:.1" |
| out2= ":OUTPATH:.2" |
| |
| P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); |
| B= filter A by age < 50; |
| store B into '$out1'; |
| C = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double); |
| D = filter C by name matches '^fred*'; |
| store D into '$out2'; |
| """) |
| |
| results = P.bind().run() |
| iter = results[0].result("E").iterator() |
| |
| if results[0].isSuccessful(): |
| print "Pig job PASSED" |
| |
| else: |
| raise "Pig job FAILED" |
| \ |
| ,'rc' => 6 |
| ,'expected_err_regex' => "ERROR 1121" |
| }, |
| ] |
| }, |
| { |
| 'name' => 'Jython_Command', |
| 'tests' => [ |
| { |
| # sql command |
| 'num' => 2 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| Q = Pig.compile("sh echo mymessage") |
| result = Q.bind().runSingle() |
| |
| if result.isSuccessful() : |
| print 'Pig job succeeded' |
| else : |
| raise 'Cant run sh command' |
| \ |
| ,'rc' => 0 |
| ,'expected_out_regex' => "mymessage\nPig job succeeded" |
| }, |
| { |
| # sql command |
| 'num' => 3 |
| ,'pig' => q\#!/usr/bin/python |
| from org.apache.pig.scripting import Pig |
| |
| Q = Pig.compile("sh cat nonexistfile") |
| result = Q.bind().runSingle() |
| |
| if result.isSuccessful() : |
| print 'Pig job succeeded' |
| else : |
| raise 'Cant run sh command' |
| \ |
| ,'rc' => 6 |
| } |
| ] |
| } |
| ] |
| } |
| ; |
| |
| |
| |
| |