blob: 7bfb7bb6d57ced658370b5e319ff20e6202202f2 [file] [log] [blame]
#!/usr/bin/env perl
############################################################################
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################
# Tests for Jython embedding
#
#
our $PASSED="Pig job PASSED";
our $FAILED="Pig job FAILED";
$cfg = {
'driver' => 'Pig',
,'groups' => [
{
'name' => 'Jython_Checkin',
'tests' => [
{
'num' => 2,
'floatpostprocess' => 1,
'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1=':INPATH:/singlefile/studenttab10k'
input2=':INPATH:/singlefile/votertab10k'
output=':OUTPATH:'
P = Pig.compile("""
a = load '$in1' as (name, age, gpa);
b = load '$in2' as (name, age, registration, contributions);
c = filter a by age < 50;
d = filter b by age < 50;
e = cogroup c by (name, age), d by (name, age) ;
f = foreach e generate flatten(c), flatten(d);
g = group f by registration;
h = foreach g generate group, SUM(f.d::contributions);
i = order h by $1;
store i into '$out';
""").bind({'in1':input1,'in2':input2, 'out':output}).runSingle()
if P.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 50;
d = filter b by age < 50;
e = cogroup c by (name, age), d by (name, age) ;
f = foreach e generate flatten(c), flatten(d);
g = group f by registration;
h = foreach g generate group, SUM(f.d::contributions);
i = order h by $1;
store i into ':OUTPATH:';
\,
'sortArgs' => ['-t', ' ', '-k', '2,2'],
}, {
'num' => 3,
#jython uses 'python.home'/cachedir when python.cachedir is not specified.
#To test python.cachedir is set correctly by the framework,
#setting python.home to a random path
'java_params' => ['-Dpython.home=/dev/null/fake'],
'pig' => q\#!/usr/bin/python
import sys
from org.apache.hadoop.conf import *
from org.apache.hadoop.fs import *
config = Configuration()
hdfs = FileSystem.get(config)
\,
,'rc' => 0
}
]
},
{
'name' => 'Jython_Embedded',
'tests' => [
{
'num' => 1,
,'pig' => q\#!/usr/bin/python
# JYTHON COMMENT
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';""")
Q = P.bind()
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';\,
}
]
}, {
'name' => 'Jython_CompileBindRun'
,'tests' => [
{ # bind() with no parameters, runSingle
'num' => 1
,'pig' => q\#!/usr/bin/python
# JYTHON COMMENT
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';""")
Q = P.bind()
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';\,
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# 9.2 1 bind single input parameter and no output parameters
'num' => 2
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input= ":INPATH:/singlefile/studenttab10k"
output = ":OUTPATH:"
P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into ':OUTPATH:';""")
Q = P.bind({'in':input})
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';\,
,'floatpostprocess' => 1
,'delimiter' => ' '
# ,'expected_out_regex' => "Pig job PASSED"
},{
# bind parallel execution with a multiple entries
'num' => 3
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input= ":INPATH:/singlefile/studenttab10k"
max1="50";
output1= ":OUTPATH:.1"
max2="40.0";
output2= ":OUTPATH:.2"
max3="30.0f";
output3= ":OUTPATH:.3"
P = Pig.compile("""
A = load '$in' as (name, age, gpa);
B= filter A by age < $max;
C = foreach B generate name;
store C into '$out';
-- store C into put here as comment to fake multi-query
""")
Q = P.bind([
{'in':input ,'max':max1 ,'out':output1 }
,{'in':input ,'max':max2 ,'out':output2 }
,{'in':input ,'max':max3 ,'out':output3 }
])
results = Q.run()
for i in [0, 1, 2]:
result = results[i]
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= filter A by age < 50;
C = foreach B generate name;
store C into ':OUTPATH:.1';
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= filter A by age < 40.0;
C = foreach B generate name;
store C into ':OUTPATH:.2';
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= filter A by age < 30.0f;
C = foreach B generate name;
store C into ':OUTPATH:.3';
\,
},{
# 8.6 compile pig script file with no input and no output parameters
#12.2 import python modules
#
'num' => 4
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
pig_script = ":TMP:/script.pig"
pigfile = open( pig_script, 'w+')
pigfile.write("""
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
""")
pigfile.close()
#execute pig script
result = Pig.compileFromFile( pig_script ).bind().runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# 8.7 compile pig script file with no input and with output parameters
'num' => 5
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
from org.apache.pig.scripting import Pig
#create pig script
pig_script = ":TMP:/script.pig"
pigfile = open( pig_script, 'w+')
pigfile.write("""
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into '$out';
""")
pigfile.close()
#execute pig script
output= ":OUTPATH:"
result = Pig.compileFromFile(pig_script).bind({'out':output}).runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
# 11.15 1 results.getResults(alias) for null results
'num' => 6
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
EMPTY= filter A by age > 9999;
store EMPTY into ':OUTPATH:';
""")
result = P.bind().runSingle()
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
EMPTY= filter A by age > 9999;
store EMPTY into ':OUTPATH:';\
},
{
# bind reading from python context
'num' => 7
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load '$inpath' as (name, age, gpa);
B= filter A by age < $max;
store B into '$out';
""")
#execute pig script
inpath= ":INPATH:/singlefile/studenttab10k"
out= ":OUTPATH:"
max = 40
result = P.bind().runSingle()
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= filter A by age < 40;
store B into ':OUTPATH:';\
},{
# bind multiple times
'num' => 8
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load '$inpath' as (name, age, gpa);
B= foreach A generate age + $i;
store B into '$out';
-- extra store B into to force multiquery
""")
#execute pig script
inpath= ":INPATH:/singlefile/studenttab10k"
for i in [1,2,3]:
out= ":OUTPATH:." + str(i)
result = P.bind().runSingle()
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= foreach A generate age + 1;
store B into ':OUTPATH:.1';
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= foreach A generate age + 2;
store B into ':OUTPATH:.2';
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= foreach A generate age + 3;
store B into ':OUTPATH:.3';\,
},
{
# invoke .run() on a non-parallel pig script
'num' => 9
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
""")
result = P.bind().run()
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';\,
},
{
# 8.6 compile pig script file with no input and no output parameters
#12.2 import python modules
#
'num' => 10
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
pig_script = ":TMP:/script.pig"
pigfile = open( pig_script, 'w+')
pigfile.write("""
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
-- a comment
store A into ':OUTPATH:';
""")
pigfile.close()
#execute pig script
result = Pig.compileFromFile( pig_script ).bind().runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
'num' => 11
,'pig_params' => ['-p', qq(loadfile='studenttab10k')],
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/$loadfile' as (name, age, gpa);
store A into ':OUTPATH:';""")
Q = P.bind()
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
'num' => 12
,'pig_params' => ['-m', ":PARAMPATH:/params_3"],
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/$fname' as (name, age, gpa);
store A into ':OUTPATH:';""")
Q = P.bind()
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
\
,'floatpostprocess' => 1
,'delimiter' => ' '
},{
'num' => 13
,'additional_cmd_args' => ['studenttab10k']
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
import sys
P = Pig.compile("A = load ':INPATH:/singlefile/" + sys.argv[1] + "' as (name, age, gpa);" +
"store A into ':OUTPATH:';");
Q = P.bind()
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';
\
,'floatpostprocess' => 1
,'delimiter' => ' '
}
]
},{
'name' => 'Jython_Diagnostics'
,'tests' => [
{
# 11.23 1 explain() on a complex query
'num' => 1
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1 = ":OUTPATH:.1"
output2 = ":OUTPATH:.2"
maximum="20";
#No Schema specified
P = Pig.compile("""
A = load '$in1' as (name, age, gpa);
B= filter A by age < $max;
C= foreach B generate name,age;
store C into '$out1';
D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
E= filter D by age < $max;
F= foreach E generate name,age;
store F into '$out2';
""")
result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).explain()
\
,'rc'=> 0
}, {
#11.22 1 illustrate() on a complex query
'num' => 2
,'execonly' => 'mapred,local' #TODO: PIG-3993: Illustrate is yet to be implemented in Tez
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1 = ":OUTPATH:.1"
output2 = ":OUTPATH:.2"
maximum="20";
#No Schema specified
P = Pig.compile("""
A = load '$in1' as (name, age, gpa);
B= filter A by age < $max;
C= foreach B generate name,age;
store C into '$out1';
D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
E= filter D by age < $max;
F= foreach E generate name,age;
store F into '$out2';
""")
result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).illustrate();
\
,'rc'=> 0
,'expected_out_regex' => "A.*name:bytearray.*age:bytearray.*gpa:bytearray"
}, {
# 11.24 1 describe() on an alias
'num' => 3
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1 = ":OUTPATH:.1"
output2 = ":OUTPATH:.2"
maximum="20";
#No Schema specified
P = Pig.compile("""
A = load '$in1' as (name, age, gpa);
B= filter A by age < $max;
C= foreach B generate name,age;
store C into '$out1';
D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
E= filter D by age < $max;
F= foreach E generate name,age;
store F into '$out2';
""")
result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).describe('A');
\
,'rc'=> 0
,'expected_out_regex' => "A:.*{name:.*bytearray,age:.*bytearray,gpa:.*bytearray}"
}, {
#11.29 1 describe() on an undefined alias
'num' => 4
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1 = ":OUTPATH:.1"
output2 = ":OUTPATH:.2"
maximum="20";
#No Schema specified
P = Pig.compile("""
A = load '$in1' as (name, age, gpa);
B= filter A by age < $max;
C= foreach B generate name,age;
store C into '$out1';
D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
E= filter D by age < $max;
F= foreach E generate name,age;
store F into '$out2';
""")
result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).describe('INVALID_ALIAS');
\
,'rc'=> 6
,'expected_err_regex' => "ERROR 1003: Unable to find an operator for alias INVALID_ALIAS"
}, {
# 11.27 1 illustrate(alias)
'num' => 5
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1 = ":OUTPATH:.1"
output2 = ":OUTPATH:.2"
maximum="20";
#No Schema specified
P = Pig.compile("""
A = load '$in1' as (name, age, gpa);
B= filter A by age < $max;
C= foreach B generate name,age;
store C into '$out1';
D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
E= filter D by age < $max;
F= foreach E generate name,age;
store F into '$out2';
""")
result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).illustrate('D');
\
,'rc'=> 6
,'expected_err_regex' => "ERROR 1121"
}, {
# 11.28 1 explain(alias)
'num' => 6
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1 = ":OUTPATH:.1"
output2 = ":OUTPATH:.2"
maximum="20";
#No Schema specified
P = Pig.compile("""
A = load '$in1' as (name, age, gpa);
B= filter A by age < $max;
C= foreach B generate name,age;
store C into '$out1';
D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
E= filter D by age < $max;
F= foreach E generate name,age;
store F into '$out2';
""")
result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).explain('C')
\
,'rc'=> 6
,'expected_err_regex' => "ERROR 1121"
# ,'expected_err_regex' => "TypeError: explain(): expected 0 args; got 1"
}
]
}, {
'name' => 'Iterator'
,'tests' => [
{
'num' => 1
,'pig' => q@#!/usr/bin/python
from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k';
store A into ':OUTPATH:.tmp';
""")
#execute pig script
result = P.bind().runSingle()
if not result.isSuccessful():
raise "Pig job FAILED"
pigfile = open(":TMP:/iterator_output.txt", 'wb+')
iter = result.result("A").iterator()
while iter.hasNext():
t = iter.next()
name = t.get(0)
age = t.get(1)
gpa = t.get(2)
pigfile.write(str(name) + "\\t" + str(age) + "\\t" + str(gpa) + "\\n")
pigfile.close()
Pig.fs("-mkdir :OUTPATH:")
Pig.fs("-copyFromLocal :TMP:/iterator_output.txt :OUTPATH:/part-m-00000")
@,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k';
store A into ':OUTPATH:';\,
},
]
}, {
# 12.2 import python modules
# 12.1 python comments
# 12.6 fs lists a file
'name' => 'Jython_Misc'
,'tests' => [
{
'num' => 1
,'pig' => q\#!/usr/bin/python
# JYTHON COMMENT
from org.apache.pig.scripting import Pig
Pig.fs( "-ls :INPATH:/singlefile/studenttab10k");
\
,'expected_out_regex' => "studenttab10k"
}
]
}, {
'name' => 'Jython_Macro'
,'tests' => [
{
'num' => 1
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
# mymodule.pig
pig_script = ":TMP:/module.pig"
pigfile = open( pig_script, 'w+')
pigfile.write("""
define group_and_count (A, group_key) returns B {
D = group $A by $group_key;
$B = foreach D generate group, COUNT($A);
};
""")
pigfile.close()
#main.pig
P = Pig.compile("""import ':TMP:/module.pig';
alpha = load ':INPATH:/singlefile/studenttab10k' as (user, age, gpa);
gamma = group_and_count (alpha, user);
store gamma into ':OUTPATH:';
""")
P.bind().runSingle()
\,
'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (user, age, gpa);
B = group A by user;
C = foreach B generate group, COUNT(A);
store C into ':OUTPATH:';\
}
]
}, {
# SET debug 'on'
# SET job.name 'my job'
# SET default_parallel 100
'name' => 'Jython_Properties',
'tests' => [
{
'num' => 1
,'ignore' => 1 # This is a good test except that we can't verify it.
,'pig' => q\#!/usr/bin/python
# JYTHON COMMENT
from org.apache.pig.scripting import Pig
from java.util import Properties;
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';""")
Q = P.bind()
prop = Properties()
prop.put("mapred.job.name", "friendship")
result = Q.runSingle(prop)
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'sql' => "select name, age, gpa+0.00 from studenttab10k;"
,'floatpostprocess' => 1
,'delimiter' => ' '
}
]
},
{
'name' => 'Jython_Error',
'tests' => [
{
# run a script that returns single negative result
'num' => 1
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input= ":INPATH:/singlefile/studenttab10k"
output = ":OUTPATH:"
P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""")
Q = P.bind({'in':bad_input, 'out':output})
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'rc' => 6
,'expected_err_regex' => "ERROR 1121"
},
{
# run a script that returns single negative result
'num' => 2
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input= ":INPATH:/singlefile/studenttab10k"
output = ":OUTPATH:"
P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""")
Q = P.bind({'in':input, 'out':bad_output})
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'rc' => 6
,'expected_err_regex' => "name 'bad_output' is not defined"
},{
# bind an undefined input parameter
'num' => 3
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
input= ":INPATH:/singlefile/studenttab10k"
output = ":OUTPATH:"
P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""")
Q = P.bind({'in':invalid_parameter, 'out':output})
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'expected_err_regex' => "ERROR 1121"
,'rc'=> 6
},
{
# compileFromFile for pig script file that does not exist throws IOException
'num' => 4
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
# intentionally don't create pig script
pig_script = tmp_dir + "/script.pig"
#execute pig script
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1= ":OUTPATH:.1"
output2= ":OUTPATH:.2"
result = Pig.compileFromFile(pig_script).bind({'in1':input1,'in2':input2, 'out1':output1, 'out2':output2 }).run()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'expected_err_regex' => "ERROR 1121"
,'rc'=> 6
},
{
# compileFromFile for pig script file that does not have read permissions throws IOException
'num' => 5
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
pig_script = ":TMP:/script.pig"
pigfile = open( pig_script, 'w')
#no read permissions and file is left open until afer compile statement
pigfile.write("""
A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
store A into '$out1';
store B into '$out2';
""")
pigfile.close()
#execute pig script
input1= ":INPATH:/singlefile/studenttab10k"
input2= ":INPATH:/singlefile/votertab10k"
output1= ":OUTPATH:.1"
output2= ":OUTPATH:.2"
result = Pig.compileFromFile(pig_script).bind({'in1':input1,'in2':input2, 'out1':output1, 'out2':output2 }).run()
pigfile.close()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'expected_err_regex' => "ERROR 1121"
,'rc'=> 6
},{
# compileFromFile for pig script file without python library
'num' => 6
,'pig' => q\#!/usr/bin/python
# JYTHON COMMENT
#from org.apache.pig.scripting import Pig
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
store A into ':OUTPATH:';""")
Q = P.bind()
result = Q.runSingle()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'rc' => 6
,'expected_err_regex' => "ERROR 1121"
},
{
# 11.10 iter.next for an alias that is undefined
'num' => 7
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
B= filter A by age < 50;
store B into '$out1';
C = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
D = filter C by name matches '^fred*';
store D into '$out2';
""")
result = P.bind().run()
if result.isSuccessful():
print "Pig job PASSED"
else:
raise "Pig job FAILED"
\
,'rc' => 6
,'expected_err_regex' => "ERROR 1121"
},
]
},
{
'name' => 'Jython_Command',
'tests' => [
{
# sql command
'num' => 1
,'java_params' => ['-Dhcat.bin=:HCATBIN:']
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
#create pig script
Pig.sql("""sql drop table if exists pig_script_hcat_ddl_1;""")
ret = Pig.sql("""sql create table pig_script_hcat_ddl_1(name string,
age int,
gpa double)
stored as textfile;
""")
if ret==0:
print "SQL command PASSED"
else:
raise "SQL command FAILED"
\
,'rc' => 0
},
{
# sql command
'num' => 2
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
Q = Pig.compile("sh echo mymessage")
result = Q.bind().runSingle()
if result.isSuccessful() :
print 'Pig job succeeded'
else :
raise 'Cant run sh command'
\
,'rc' => 0
,'expected_out_regex' => "mymessage\nPig job succeeded"
},
{
# sql command
'num' => 3
,'pig' => q\#!/usr/bin/python
from org.apache.pig.scripting import Pig
Q = Pig.compile("sh cat nonexistfile")
result = Q.bind().runSingle()
if result.isSuccessful() :
print 'Pig job succeeded'
else :
raise 'Cant run sh command'
\
,'rc' => 6
}
]
}
]
}
;