blob: 803e774b085800de5b30d8195d26483c24d5c4e3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import junit.framework.Assert;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecJob;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.newplan.logical.relational.LogicalPlan;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.PigStats.JobGraph;
import org.apache.pig.tools.pigstats.ScriptState;
import org.junit.Test;
public class TestPigStats {
private static final Log LOG = LogFactory.getLog(TestPigStats.class);
@Test
public void testPigScriptInConf() throws Exception {
PrintWriter w = new PrintWriter(new FileWriter("test.pig"));
w.println("register /mydir/sath.jar");
w.println("register /mydir/lib/hadoop-tools-0.20.201.0-SNAPSHOT.jar");
w.println("register /mydir/lib/jackson-core-asl-1.4.2.jar");
w.println("register /mydir/lib/jackson-mapper-asl-1.4.2.jar");
w.close();
ScriptState ss = ScriptState.get();
ss.setScript(new File("test.pig"));
Configuration conf = new Configuration();
MapReduceOper mro = new MapReduceOper(new OperatorKey());
ss.addSettingsToConf(mro, conf);
String s = conf.get("pig.script");
String script = new String(Base64.decodeBase64(s.getBytes()));
String expected =
"register /mydir/sath.jar\n" +
"register /mydir/lib/hadoop-tools-0.20.201.0-SNAPSHOT.jar\n" +
"register /mydir/lib/jackson-core-asl-1.4.2.jar\n" +
"register /mydir/lib/jackson-mapper-asl-1.4.2.jar\n";
Assert.assertEquals(expected, script);
}
@Test
public void testJythonScriptInConf() throws Exception {
String[] script = {
"#!/usr/bin/python",
"from org.apache.pig.scripting import *",
"Pig.fs(\"rmr simple_out\")",
"input = 'simple_table'",
"output = 'simple_out'",
"P = Pig.compile(\"\"\"a = load '$input';store a into '$output';\"\"\")",
"Q = P.bind({'input':input, 'output':output})",
"stats = Q.runSingle()",
"if stats.isSuccessful():",
"\tprint 'success!'",
"else:",
"\traise 'failed'"
};
Util.createLocalInputFile( "testScript.py", script);
ScriptState ss = ScriptState.get();
ss.setScript(new File("testScript.py"));
Configuration conf = new Configuration();
MapReduceOper mro = new MapReduceOper(new OperatorKey());
ss.addSettingsToConf(mro, conf);
String s = conf.get("pig.script");
String actual = new String(Base64.decodeBase64(s.getBytes()));
String expected =
"#!/usr/bin/python\n" +
"from org.apache.pig.scripting import *\n" +
"Pig.fs(\"rmr simple_out\")\n" +
"input = 'simple_table'\n" +
"output = 'simple_out'\n" +
"P = Pig.compile(\"\"\"a = load '$input';store a into '$output';\"\"\")\n" +
"Q = P.bind({'input':input, 'output':output})\n" +
"stats = Q.runSingle()\n" +
"if stats.isSuccessful():\n" +
"\tprint 'success!'\n" +
"else:\n" +
"\traise 'failed'\n";
Assert.assertEquals(expected, actual);
}
@Test
public void testBytesWritten_JIRA_1027() {
File outputFile = null;
try {
String fileName = this.getClass().getName() + "_" + "testBytesWritten_JIRA_1027";
outputFile = File.createTempFile(fileName, ".out");
String filePath = outputFile.getAbsolutePath();
outputFile.delete();
PigServer pig = new PigServer(ExecType.LOCAL);
pig.registerQuery("A = load 'test/org/apache/pig/test/data/passwd';");
ExecJob job = pig.store("A", filePath);
PigStats stats = job.getStatistics();
File dataFile = new File( outputFile.getAbsoluteFile() + File.separator + "part-00000" );
assertEquals(dataFile.length(), stats.getBytesWritten());
} catch (IOException e) {
LOG.error("Error while generating file", e);
fail("Encountered IOException");
} finally {
if (outputFile != null) {
// Hadoop Local mode creates a directory
// Hence we need to delete a directory recursively
deleteDirectory(outputFile);
}
}
}
@Test
public void testPigStatsAlias() throws Exception {
try {
PigServer pig = new PigServer(ExecType.LOCAL);
pig.setBatchOn();
pig.registerQuery("A = load 'input' as (name, age, gpa);");
pig.registerQuery("B = group A by name;");
pig.registerQuery("C = foreach B generate group, COUNT(A);");
pig.registerQuery("D = order C by $1;");
pig.registerQuery("E = limit D 10;");
pig.registerQuery("store E into 'alias_output';");
LogicalPlan lp = getLogicalPlan(pig);
PhysicalPlan pp = pig.getPigContext().getExecutionEngine().compile(lp,
null);
MROperPlan mp = getMRPlan(pp, pig.getPigContext());
assertEquals(4, mp.getKeys().size());
MapReduceOper mro = mp.getRoots().get(0);
assertEquals("A,B,C", getAlias(mro));
mro = mp.getSuccessors(mro).get(0);
assertEquals("D", getAlias(mro));
mro = mp.getSuccessors(mro).get(0);
assertEquals("D", getAlias(mro));
} finally {
File outputfile = new File("alias_output");
if (outputfile.exists()) {
// Hadoop Local mode creates a directory
// Hence we need to delete a directory recursively
deleteDirectory(outputfile);
}
}
}
@Test
public void testPigStatsGetList() {
File outputFile = null;
try {
String filename = this.getClass().getSimpleName() + "_" + "testPigStatsGetList";
outputFile = File.createTempFile(filename, ".out");
String filePath = outputFile.getAbsolutePath();
outputFile.delete();
PigServer pigServer = new PigServer(ExecType.LOCAL);
pigServer.registerQuery("a = load 'test/org/apache/pig/test/data/passwd';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group, COUNT(a) as cnt;");
pigServer.registerQuery("d = group c by cnt;");
pigServer.registerQuery("e = foreach d generate group;");
ExecJob job = pigServer.store("e", filePath);
JobGraph jobGraph = job.getStatistics().getJobGraph();
assertEquals(2, jobGraph.getJobList().size());
} catch (IOException e) {
LOG.error("IOException while creating file ", e);
fail("Encountered IOException");
} finally {
if (outputFile != null) {
// delete the directory before returning
deleteDirectory(outputFile);
}
}
}
private void deleteDirectory(File dir) {
try {
FileUtils.deleteDirectory(dir);
} catch (IOException e) {
LOG.error("Could not delete directory " + dir, e);
}
}
public static LogicalPlan getLogicalPlan(PigServer pig) throws Exception {
java.lang.reflect.Method buildLp = pig.getClass().getDeclaredMethod("buildLp");
buildLp.setAccessible(true);
return (LogicalPlan ) buildLp.invoke( pig );
}
public static MROperPlan getMRPlan(PhysicalPlan pp, PigContext ctx) throws Exception {
MapReduceLauncher launcher = new MapReduceLauncher();
java.lang.reflect.Method compile = launcher.getClass()
.getDeclaredMethod("compile",
new Class[] { PhysicalPlan.class, PigContext.class });
compile.setAccessible(true);
return (MROperPlan) compile.invoke(launcher, new Object[] { pp, ctx });
}
public static String getAlias(MapReduceOper mro) throws Exception {
ScriptState ss = ScriptState.get();
java.lang.reflect.Method getAlias = ss.getClass()
.getDeclaredMethod("getAlias",
new Class[] { MapReduceOper.class });
getAlias.setAccessible(true);
return (String)getAlias.invoke(ss, new Object[] { mro });
}
}