hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/EventFetcher.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.mapreduce.task.reduce;

 import java.io.IOException;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate;
 import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
 import org.apache.hadoop.mapreduce.TaskAttemptID;

 class EventFetcher<K,V> extends Thread {
   private static final long SLEEP_TIME = 1000;
   private static final int MAX_RETRIES = 10;
   private static final int RETRY_PERIOD = 5000;
   private static final Log LOG = LogFactory.getLog(EventFetcher.class);

   private final TaskAttemptID reduce;
   private final TaskUmbilicalProtocol umbilical;
   private final ShuffleScheduler<K,V> scheduler;
   private int fromEventIdx = 0;
   private final int maxEventsToFetch;
   private final ExceptionReporter exceptionReporter;

   private volatile boolean stopped = false;

   public EventFetcher(TaskAttemptID reduce,
                       TaskUmbilicalProtocol umbilical,
                       ShuffleScheduler<K,V> scheduler,
                       ExceptionReporter reporter,
                       int maxEventsToFetch) {
     setName("EventFetcher for fetching Map Completion Events");
     setDaemon(true);
     this.reduce = reduce;
     this.umbilical = umbilical;
     this.scheduler = scheduler;
     exceptionReporter = reporter;
     this.maxEventsToFetch = maxEventsToFetch;
   }

   @Override
   public void run() {
     int failures = 0;
     LOG.info(reduce + " Thread started: " + getName());

     try {
       while (!stopped && !Thread.currentThread().isInterrupted()) {
         try {
           int numNewMaps = getMapCompletionEvents();
           failures = 0;
           if (numNewMaps > 0) {
             LOG.info(reduce + ": " + "Got " + numNewMaps + " new map-outputs");
           }
           LOG.debug("GetMapEventsThread about to sleep for " + SLEEP_TIME);
           if (!Thread.currentThread().isInterrupted()) {
             Thread.sleep(SLEEP_TIME);
           }
         } catch (InterruptedException e) {
           LOG.info("EventFetcher is interrupted.. Returning");
           return;
         } catch (IOException ie) {
           LOG.info("Exception in getting events", ie);
           // check to see whether to abort
           if (++failures >= MAX_RETRIES) {
             throw new IOException("too many failures downloading events", ie);
           }
           // sleep for a bit
           if (!Thread.currentThread().isInterrupted()) {
             Thread.sleep(RETRY_PERIOD);
           }
         }
       }
     } catch (InterruptedException e) {
       return;
     } catch (Throwable t) {
       exceptionReporter.reportException(t);
       return;
     }
   }

   public void shutDown() {
     this.stopped = true;
     interrupt();
     try {
       join(5000);
     } catch(InterruptedException ie) {
       LOG.warn("Got interrupted while joining " + getName(), ie);
     }
   }

   /**
    * Queries the {@link TaskTracker} for a set of map-completion events
    * from a given event ID.
    * @throws IOException
    */
   protected int getMapCompletionEvents()
       throws IOException, InterruptedException {

     int numNewMaps = 0;
     TaskCompletionEvent events[] = null;

     do {
       MapTaskCompletionEventsUpdate update =
           umbilical.getMapCompletionEvents(
               (org.apache.hadoop.mapred.JobID)reduce.getJobID(),
               fromEventIdx,
               maxEventsToFetch,
               (org.apache.hadoop.mapred.TaskAttemptID)reduce);
       events = update.getMapTaskCompletionEvents();
       LOG.debug("Got " + events.length + " map completion events from " +
                fromEventIdx);

       assert !update.shouldReset() : "Unexpected legacy state";

       // Update the last seen event ID
       fromEventIdx += events.length;

       // Process the TaskCompletionEvents:
       // 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs.
       // 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop
       //    fetching from those maps.
       // 3. Remove TIPFAILED maps from neededOutputs since we don't need their
       //    outputs at all.
       for (TaskCompletionEvent event : events) {
         scheduler.resolve(event);
         if (TaskCompletionEvent.Status.SUCCEEDED == event.getTaskStatus()) {
           ++numNewMaps;
         }
       }
     } while (events.length == maxEventsToFetch);

     return numNewMaps;
   }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.mapreduce.task.reduce;

	import java.io.IOException;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate;
	import org.apache.hadoop.mapred.TaskCompletionEvent;
	import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
	import org.apache.hadoop.mapreduce.TaskAttemptID;

	class EventFetcher<K,V> extends Thread {
	private static final long SLEEP_TIME = 1000;
	private static final int MAX_RETRIES = 10;
	private static final int RETRY_PERIOD = 5000;
	private static final Log LOG = LogFactory.getLog(EventFetcher.class);

	private final TaskAttemptID reduce;
	private final TaskUmbilicalProtocol umbilical;
	private final ShuffleScheduler<K,V> scheduler;
	private int fromEventIdx = 0;
	private final int maxEventsToFetch;
	private final ExceptionReporter exceptionReporter;

	private volatile boolean stopped = false;

	public EventFetcher(TaskAttemptID reduce,
	TaskUmbilicalProtocol umbilical,
	ShuffleScheduler<K,V> scheduler,
	ExceptionReporter reporter,
	int maxEventsToFetch) {
	setName("EventFetcher for fetching Map Completion Events");
	setDaemon(true);
	this.reduce = reduce;
	this.umbilical = umbilical;
	this.scheduler = scheduler;
	exceptionReporter = reporter;
	this.maxEventsToFetch = maxEventsToFetch;
	}

	@Override
	public void run() {
	int failures = 0;
	LOG.info(reduce + " Thread started: " + getName());

	try {
	while (!stopped && !Thread.currentThread().isInterrupted()) {
	try {
	int numNewMaps = getMapCompletionEvents();
	failures = 0;
	if (numNewMaps > 0) {
	LOG.info(reduce + ": " + "Got " + numNewMaps + " new map-outputs");
	}
	LOG.debug("GetMapEventsThread about to sleep for " + SLEEP_TIME);
	if (!Thread.currentThread().isInterrupted()) {
	Thread.sleep(SLEEP_TIME);
	}
	} catch (InterruptedException e) {
	LOG.info("EventFetcher is interrupted.. Returning");
	return;
	} catch (IOException ie) {
	LOG.info("Exception in getting events", ie);
	// check to see whether to abort
	if (++failures >= MAX_RETRIES) {
	throw new IOException("too many failures downloading events", ie);
	}
	// sleep for a bit
	if (!Thread.currentThread().isInterrupted()) {
	Thread.sleep(RETRY_PERIOD);
	}
	}
	}
	} catch (InterruptedException e) {
	return;
	} catch (Throwable t) {
	exceptionReporter.reportException(t);
	return;
	}
	}

	public void shutDown() {
	this.stopped = true;
	interrupt();
	try {
	join(5000);
	} catch(InterruptedException ie) {
	LOG.warn("Got interrupted while joining " + getName(), ie);
	}
	}

	/**
	* Queries the {@link TaskTracker} for a set of map-completion events
	* from a given event ID.
	* @throws IOException
	*/
	protected int getMapCompletionEvents()
	throws IOException, InterruptedException {

	int numNewMaps = 0;
	TaskCompletionEvent events[] = null;

	do {
	MapTaskCompletionEventsUpdate update =
	umbilical.getMapCompletionEvents(
	(org.apache.hadoop.mapred.JobID)reduce.getJobID(),
	fromEventIdx,
	maxEventsToFetch,
	(org.apache.hadoop.mapred.TaskAttemptID)reduce);
	events = update.getMapTaskCompletionEvents();
	LOG.debug("Got " + events.length + " map completion events from " +
	fromEventIdx);

	assert !update.shouldReset() : "Unexpected legacy state";

	// Update the last seen event ID
	fromEventIdx += events.length;

	// Process the TaskCompletionEvents:
	// 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs.
	// 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop
	// fetching from those maps.
	// 3. Remove TIPFAILED maps from neededOutputs since we don't need their
	// outputs at all.
	for (TaskCompletionEvent event : events) {
	scheduler.resolve(event);
	if (TaskCompletionEvent.Status.SUCCEEDED == event.getTaskStatus()) {
	++numNewMaps;
	}
	}
	} while (events.length == maxEventsToFetch);

	return numNewMaps;
	}

	}