branch-2.0.4-alpha/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingStatus.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.streaming;

 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.File;

 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import static org.junit.Assert.*;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.Counters;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.apache.hadoop.mapred.TaskAttemptID;
 import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapred.TaskLog;
 import org.apache.hadoop.mapred.TaskReport;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.MapReduceTestUtil;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;


 /**
  * Tests if mapper/reducer with empty/nonempty input works properly if
  * reporting is done using lines like "reporter:status:" and
  * "reporter:counter:" before map()/reduce() method is called.
  * Validates the task's log of STDERR if messages are written to stderr before
  * map()/reduce() is called.
  * Also validates job output.
  * Uses MiniMR since the local jobtracker doesn't track task status.
  */
 public class TestStreamingStatus {
   protected static String TEST_ROOT_DIR =
     new File(System.getProperty("test.build.data","/tmp"),
     TestStreamingStatus.class.getSimpleName())
     .toURI().toString().replace(' ', '+');
   protected String INPUT_FILE = TEST_ROOT_DIR + "/input.txt";
   protected String OUTPUT_DIR = TEST_ROOT_DIR + "/out";
   protected String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n";
   protected String map = null;
   protected String reduce = null;

   protected String scriptFile = TEST_ROOT_DIR + "/perlScript.pl";
   protected String scriptFileName = new Path(scriptFile).toUri().getPath();


   String expectedStderr = "my error msg before consuming input\n" +
       "my error msg after consuming input\n";
   String expectedOutput = null;// inited in setUp()
   String expectedStatus = "before consuming input";

   // This script does the following
   // (a) setting task status before reading input
   // (b) writing to stderr before reading input and after reading input
   // (c) writing to stdout before reading input
   // (d) incrementing user counter before reading input and after reading input
   // Write lines to stdout before reading input{(c) above} is to validate
   // the hanging task issue when input to task is empty(because of not starting
   // output thread).
   protected String script =
     "#!/usr/bin/perl\n" +
     "print STDERR \"reporter:status:" + expectedStatus + "\\n\";\n" +
     "print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n" +
     "print STDERR \"my error msg before consuming input\\n\";\n" +
     "for($count = 1500; $count >= 1; $count--) {print STDOUT \"$count \";}" +
     "while(<STDIN>) {chomp;}\n" +
     "print STDERR \"my error msg after consuming input\\n\";\n" +
     "print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n";

   MiniMRCluster mr = null;
   FileSystem fs = null;
   JobConf conf = null;

   /**
    * Start the cluster and create input file before running the actual test.
    *
    * @throws IOException
    */
   @Before
   public void setUp() throws IOException {
     conf = new JobConf();
     conf.setBoolean(JTConfig.JT_RETIREJOBS, false);
     conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false);

     mr = new MiniMRCluster(1, "file:///", 3, null , null, conf);

     Path inFile = new Path(INPUT_FILE);
     fs = inFile.getFileSystem(mr.createJobConf());
     clean(fs);

     buildExpectedJobOutput();
   }

   /**
    * Kill the cluster after the test is done.
    */
   @After
   public void tearDown() {
     if (fs != null) { clean(fs); }
     if (mr != null) { mr.shutdown(); }
   }

   // Updates expectedOutput to have the expected job output as a string
   void buildExpectedJobOutput() {
     if (expectedOutput == null) {
       expectedOutput = "";
       for(int i = 1500; i >= 1; i--) {
         expectedOutput = expectedOutput.concat(Integer.toString(i) + " ");
       }
       expectedOutput = expectedOutput.trim();
     }
   }

   // Create empty/nonempty input file.
   // Create script file with the specified content.
   protected void createInputAndScript(boolean isEmptyInput,
       String script) throws IOException {
     makeInput(fs, isEmptyInput ? "" : input);

     // create script file
     DataOutputStream file = fs.create(new Path(scriptFileName));
     file.writeBytes(script);
     file.close();
   }

   protected String[] genArgs(String jobtracker, String mapper, String reducer)
   {
     return new String[] {
       "-input", INPUT_FILE,
       "-output", OUTPUT_DIR,
       "-mapper", mapper,
       "-reducer", reducer,
       "-jobconf", MRJobConfig.NUM_MAPS + "=1",
       "-jobconf", MRJobConfig.NUM_REDUCES + "=1",
       "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
       "-jobconf", "stream.tmpdir=" + new Path(TEST_ROOT_DIR).toUri().getPath(),
       "-jobconf", JTConfig.JT_IPC_ADDRESS + "="+jobtracker,
       "-jobconf", "fs.default.name=file:///",
       "-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR,
       "-jobconf", "mapreduce.framework.name=yarn"
     };
   }

   // create input file with the given content
   public void makeInput(FileSystem fs, String input) throws IOException {
     Path inFile = new Path(INPUT_FILE);
     DataOutputStream file = fs.create(inFile);
     file.writeBytes(input);
     file.close();
   }

   // Delete output directory
   protected void deleteOutDir(FileSystem fs) {
     try {
       Path outDir = new Path(OUTPUT_DIR);
       fs.delete(outDir, true);
     } catch (Exception e) {}
   }

   // Delete input file, script file and output directory
   public void clean(FileSystem fs) {
     deleteOutDir(fs);
     try {
       Path file = new Path(INPUT_FILE);
       if (fs.exists(file)) {
         fs.delete(file, false);
       }
       file = new Path(scriptFile);
       if (fs.exists(file)) {
         fs.delete(file, false);
       }
     } catch (Exception e) {
       e.printStackTrace();
     }
   }

   /**
    * Check if mapper/reducer with empty/nonempty input works properly if
    * reporting is done using lines like "reporter:status:" and
    * "reporter:counter:" before map()/reduce() method is called.
    * Validate the task's log of STDERR if messages are written
    * to stderr before map()/reduce() is called.
    * Also validate job output.
    *
    * @throws IOException
    */
   @Test
   public void testReporting() throws Exception {
     testStreamJob(false);// nonempty input
     testStreamJob(true);// empty input
   }

   /**
    * Run a streaming job with the given script as mapper and validate.
    * Run another streaming job with the given script as reducer and validate.
    *
    * @param isEmptyInput Should the input to the script be empty ?
    * @param script The content of the script that will run as the streaming task
    */
   private void testStreamJob(boolean isEmptyInput)
       throws IOException {

       createInputAndScript(isEmptyInput, script);

       // Check if streaming mapper works as expected
       map = scriptFileName;
       reduce = "/bin/cat";
       runStreamJob(TaskType.MAP, isEmptyInput);
       deleteOutDir(fs);

       // Check if streaming reducer works as expected.
       map = "/bin/cat";
       reduce = scriptFileName;
       runStreamJob(TaskType.REDUCE, isEmptyInput);
       clean(fs);
   }

   // Run streaming job for the specified input file, mapper and reducer and
   // (1) Validate if the job succeeds.
   // (2) Validate if user counter is incremented properly for the cases of
   //   (a) nonempty input to map
   //   (b) empty input to map and
   //   (c) nonempty input to reduce
   // (3) Validate task status for the cases of (2)(a),(2)(b),(2)(c).
   //     Because empty input to reduce task => reporter is dummy and ignores
   //     all "reporter:status" and "reporter:counter" lines.
   // (4) Validate stderr of task of given task type.
   // (5) Validate job output
   void runStreamJob(TaskType type, boolean isEmptyInput) throws IOException {
     boolean mayExit = false;
     StreamJob job = new StreamJob(genArgs(
         mr.createJobConf().get(JTConfig.JT_IPC_ADDRESS), map, reduce), mayExit);
     int returnValue = job.go();
     assertEquals(0, returnValue);

     // If input to reducer is empty, dummy reporter(which ignores all
     // reporting lines) is set for MRErrorThread in waitOutputThreads(). So
     // expectedCounterValue is 0 for empty-input-to-reducer case.
     // Output of reducer is also empty for empty-input-to-reducer case.
     int expectedCounterValue = 0;
     if (type == TaskType.MAP || !isEmptyInput) {
       validateTaskStatus(job, type);
       // output is from "print STDOUT" statements in perl script
       validateJobOutput(job.getConf());
       expectedCounterValue = 2;
     }
     validateUserCounter(job, expectedCounterValue);
     validateTaskStderr(job, type);

     deleteOutDir(fs);
   }

   // validate task status of task of given type(validates 1st task of that type)
   void validateTaskStatus(StreamJob job, TaskType type) throws IOException {
     // Map Task has 2 phases: map, sort
     // Reduce Task has 3 phases: copy, sort, reduce
     String finalPhaseInTask;
     TaskReport[] reports;
     if (type == TaskType.MAP) {
       reports = job.jc_.getMapTaskReports(job.jobId_);
       finalPhaseInTask = "sort";
     } else {// reduce task
       reports = job.jc_.getReduceTaskReports(job.jobId_);
       finalPhaseInTask = "reduce";
     }
     assertEquals(1, reports.length);
     assertEquals(expectedStatus + " > " + finalPhaseInTask,
         reports[0].getState());
   }

   // Validate the job output
   void validateJobOutput(Configuration conf)
       throws IOException {

     String output = MapReduceTestUtil.readOutput(
         new Path(OUTPUT_DIR), conf).trim();

     assertTrue(output.equals(expectedOutput));
   }

   // Validate stderr task log of given task type(validates 1st
   // task of that type).
   void validateTaskStderr(StreamJob job, TaskType type)
       throws IOException {
     TaskAttemptID attemptId =
         new TaskAttemptID(new TaskID(job.jobId_, type, 0), 0);

     String log = MapReduceTestUtil.readTaskLog(TaskLog.LogName.STDERR,
         attemptId, false);

     // trim() is called on expectedStderr here because the method
     // MapReduceTestUtil.readTaskLog() returns trimmed String.
     assertTrue(log.equals(expectedStderr.trim()));
   }

   // Validate if user counter is incremented properly
   void validateUserCounter(StreamJob job, int expectedCounterValue)
       throws IOException {
     Counters counters = job.running_.getCounters();
     assertEquals(expectedCounterValue, counters.findCounter(
         "myOwnCounterGroup", "myOwnCounter").getValue());
   }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.streaming;

	import java.io.DataOutputStream;
	import java.io.IOException;
	import java.io.File;

	import org.junit.After;
	import org.junit.Before;
	import org.junit.Test;
	import static org.junit.Assert.*;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.mapred.Counters;
	import org.apache.hadoop.mapred.JobConf;
	import org.apache.hadoop.mapred.MiniMRCluster;
	import org.apache.hadoop.mapred.TaskAttemptID;
	import org.apache.hadoop.mapred.TaskID;
	import org.apache.hadoop.mapred.TaskLog;
	import org.apache.hadoop.mapred.TaskReport;
	import org.apache.hadoop.mapreduce.MRJobConfig;
	import org.apache.hadoop.mapreduce.TaskType;
	import org.apache.hadoop.mapreduce.MapReduceTestUtil;
	import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;


	/**
	* Tests if mapper/reducer with empty/nonempty input works properly if
	* reporting is done using lines like "reporter:status:" and
	* "reporter:counter:" before map()/reduce() method is called.
	* Validates the task's log of STDERR if messages are written to stderr before
	* map()/reduce() is called.
	* Also validates job output.
	* Uses MiniMR since the local jobtracker doesn't track task status.
	*/
	public class TestStreamingStatus {
	protected static String TEST_ROOT_DIR =
	new File(System.getProperty("test.build.data","/tmp"),
	TestStreamingStatus.class.getSimpleName())
	.toURI().toString().replace(' ', '+');
	protected String INPUT_FILE = TEST_ROOT_DIR + "/input.txt";
	protected String OUTPUT_DIR = TEST_ROOT_DIR + "/out";
	protected String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n";
	protected String map = null;
	protected String reduce = null;

	protected String scriptFile = TEST_ROOT_DIR + "/perlScript.pl";
	protected String scriptFileName = new Path(scriptFile).toUri().getPath();


	String expectedStderr = "my error msg before consuming input\n" +
	"my error msg after consuming input\n";
	String expectedOutput = null;// inited in setUp()
	String expectedStatus = "before consuming input";

	// This script does the following
	// (a) setting task status before reading input
	// (b) writing to stderr before reading input and after reading input
	// (c) writing to stdout before reading input
	// (d) incrementing user counter before reading input and after reading input
	// Write lines to stdout before reading input{(c) above} is to validate
	// the hanging task issue when input to task is empty(because of not starting
	// output thread).
	protected String script =
	"#!/usr/bin/perl\n" +
	"print STDERR \"reporter:status:" + expectedStatus + "\\n\";\n" +
	"print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n" +
	"print STDERR \"my error msg before consuming input\\n\";\n" +
	"for($count = 1500; $count >= 1; $count--) {print STDOUT \"$count \";}" +
	"while(<STDIN>) {chomp;}\n" +
	"print STDERR \"my error msg after consuming input\\n\";\n" +
	"print STDERR \"reporter:counter:myOwnCounterGroup,myOwnCounter,1\\n\";\n";

	MiniMRCluster mr = null;
	FileSystem fs = null;
	JobConf conf = null;

	/**
	* Start the cluster and create input file before running the actual test.
	*
	* @throws IOException
	*/
	@Before
	public void setUp() throws IOException {
	conf = new JobConf();
	conf.setBoolean(JTConfig.JT_RETIREJOBS, false);
	conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false);

	mr = new MiniMRCluster(1, "file:///", 3, null , null, conf);

	Path inFile = new Path(INPUT_FILE);
	fs = inFile.getFileSystem(mr.createJobConf());
	clean(fs);

	buildExpectedJobOutput();
	}

	/**
	* Kill the cluster after the test is done.
	*/
	@After
	public void tearDown() {
	if (fs != null) { clean(fs); }
	if (mr != null) { mr.shutdown(); }
	}

	// Updates expectedOutput to have the expected job output as a string
	void buildExpectedJobOutput() {
	if (expectedOutput == null) {
	expectedOutput = "";
	for(int i = 1500; i >= 1; i--) {
	expectedOutput = expectedOutput.concat(Integer.toString(i) + " ");
	}
	expectedOutput = expectedOutput.trim();
	}
	}

	// Create empty/nonempty input file.
	// Create script file with the specified content.
	protected void createInputAndScript(boolean isEmptyInput,
	String script) throws IOException {
	makeInput(fs, isEmptyInput ? "" : input);

	// create script file
	DataOutputStream file = fs.create(new Path(scriptFileName));
	file.writeBytes(script);
	file.close();
	}

	protected String[] genArgs(String jobtracker, String mapper, String reducer)
	{
	return new String[] {
	"-input", INPUT_FILE,
	"-output", OUTPUT_DIR,
	"-mapper", mapper,
	"-reducer", reducer,
	"-jobconf", MRJobConfig.NUM_MAPS + "=1",
	"-jobconf", MRJobConfig.NUM_REDUCES + "=1",
	"-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
	"-jobconf", "stream.tmpdir=" + new Path(TEST_ROOT_DIR).toUri().getPath(),
	"-jobconf", JTConfig.JT_IPC_ADDRESS + "="+jobtracker,
	"-jobconf", "fs.default.name=file:///",
	"-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR,
	"-jobconf", "mapreduce.framework.name=yarn"
	};
	}

	// create input file with the given content
	public void makeInput(FileSystem fs, String input) throws IOException {
	Path inFile = new Path(INPUT_FILE);
	DataOutputStream file = fs.create(inFile);
	file.writeBytes(input);
	file.close();
	}

	// Delete output directory
	protected void deleteOutDir(FileSystem fs) {
	try {
	Path outDir = new Path(OUTPUT_DIR);
	fs.delete(outDir, true);
	} catch (Exception e) {}
	}

	// Delete input file, script file and output directory
	public void clean(FileSystem fs) {
	deleteOutDir(fs);
	try {
	Path file = new Path(INPUT_FILE);
	if (fs.exists(file)) {
	fs.delete(file, false);
	}
	file = new Path(scriptFile);
	if (fs.exists(file)) {
	fs.delete(file, false);
	}
	} catch (Exception e) {
	e.printStackTrace();
	}
	}

	/**
	* Check if mapper/reducer with empty/nonempty input works properly if
	* reporting is done using lines like "reporter:status:" and
	* "reporter:counter:" before map()/reduce() method is called.
	* Validate the task's log of STDERR if messages are written
	* to stderr before map()/reduce() is called.
	* Also validate job output.
	*
	* @throws IOException
	*/
	@Test
	public void testReporting() throws Exception {
	testStreamJob(false);// nonempty input
	testStreamJob(true);// empty input
	}

	/**
	* Run a streaming job with the given script as mapper and validate.
	* Run another streaming job with the given script as reducer and validate.
	*
	* @param isEmptyInput Should the input to the script be empty ?
	* @param script The content of the script that will run as the streaming task
	*/
	private void testStreamJob(boolean isEmptyInput)
	throws IOException {

	createInputAndScript(isEmptyInput, script);

	// Check if streaming mapper works as expected
	map = scriptFileName;
	reduce = "/bin/cat";
	runStreamJob(TaskType.MAP, isEmptyInput);
	deleteOutDir(fs);

	// Check if streaming reducer works as expected.
	map = "/bin/cat";
	reduce = scriptFileName;
	runStreamJob(TaskType.REDUCE, isEmptyInput);
	clean(fs);
	}

	// Run streaming job for the specified input file, mapper and reducer and
	// (1) Validate if the job succeeds.
	// (2) Validate if user counter is incremented properly for the cases of
	// (a) nonempty input to map
	// (b) empty input to map and
	// (c) nonempty input to reduce
	// (3) Validate task status for the cases of (2)(a),(2)(b),(2)(c).
	// Because empty input to reduce task => reporter is dummy and ignores
	// all "reporter:status" and "reporter:counter" lines.
	// (4) Validate stderr of task of given task type.
	// (5) Validate job output
	void runStreamJob(TaskType type, boolean isEmptyInput) throws IOException {
	boolean mayExit = false;
	StreamJob job = new StreamJob(genArgs(
	mr.createJobConf().get(JTConfig.JT_IPC_ADDRESS), map, reduce), mayExit);
	int returnValue = job.go();
	assertEquals(0, returnValue);

	// If input to reducer is empty, dummy reporter(which ignores all
	// reporting lines) is set for MRErrorThread in waitOutputThreads(). So
	// expectedCounterValue is 0 for empty-input-to-reducer case.
	// Output of reducer is also empty for empty-input-to-reducer case.
	int expectedCounterValue = 0;
	if (type == TaskType.MAP \|\| !isEmptyInput) {
	validateTaskStatus(job, type);
	// output is from "print STDOUT" statements in perl script
	validateJobOutput(job.getConf());
	expectedCounterValue = 2;
	}
	validateUserCounter(job, expectedCounterValue);
	validateTaskStderr(job, type);

	deleteOutDir(fs);
	}

	// validate task status of task of given type(validates 1st task of that type)
	void validateTaskStatus(StreamJob job, TaskType type) throws IOException {
	// Map Task has 2 phases: map, sort
	// Reduce Task has 3 phases: copy, sort, reduce
	String finalPhaseInTask;
	TaskReport[] reports;
	if (type == TaskType.MAP) {
	reports = job.jc_.getMapTaskReports(job.jobId_);
	finalPhaseInTask = "sort";
	} else {// reduce task
	reports = job.jc_.getReduceTaskReports(job.jobId_);
	finalPhaseInTask = "reduce";
	}
	assertEquals(1, reports.length);
	assertEquals(expectedStatus + " > " + finalPhaseInTask,
	reports[0].getState());
	}

	// Validate the job output
	void validateJobOutput(Configuration conf)
	throws IOException {

	String output = MapReduceTestUtil.readOutput(
	new Path(OUTPUT_DIR), conf).trim();

	assertTrue(output.equals(expectedOutput));
	}

	// Validate stderr task log of given task type(validates 1st
	// task of that type).
	void validateTaskStderr(StreamJob job, TaskType type)
	throws IOException {
	TaskAttemptID attemptId =
	new TaskAttemptID(new TaskID(job.jobId_, type, 0), 0);

	String log = MapReduceTestUtil.readTaskLog(TaskLog.LogName.STDERR,
	attemptId, false);

	// trim() is called on expectedStderr here because the method
	// MapReduceTestUtil.readTaskLog() returns trimmed String.
	assertTrue(log.equals(expectedStderr.trim()));
	}

	// Validate if user counter is incremented properly
	void validateUserCounter(StreamJob job, int expectedCounterValue)
	throws IOException {
	Counters counters = job.running_.getCounters();
	assertEquals(expectedCounterValue, counters.findCounter(
	"myOwnCounterGroup", "myOwnCounter").getValue());
	}

	}