src/mapred/org/apache/hadoop/mapred/OutputCommitter.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.mapred;

 import java.io.IOException;

 /**
  * <code>OutputCommitter</code> describes the commit of task output for a
  * Map-Reduce job.
  *
  * <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
  * the job to:<p>
  * <ol>
  *   <li>
  *   Setup the job during initialization. For example, create the temporary
  *   output directory for the job during the initialization of the job.
  *   </li>
  *   <li>
  *   Cleanup the job after the job completion. For example, remove the
  *   temporary output directory after the job completion.
  *   </li>
  *   <li>
  *   Setup the task temporary output.
  *   </li>
  *   <li>
  *   Check whether a task needs a commit. This is to avoid the commit
  *   procedure if a task does not need commit.
  *   </li>
  *   <li>
  *   Commit of the task output.
  *   </li>
  *   <li>
  *   Discard the task commit.
  *   </li>
  * </ol>
  *
  * @see FileOutputCommitter
  * @see JobContext
  * @see TaskAttemptContext
  */
 public abstract class OutputCommitter
                 extends org.apache.hadoop.mapreduce.OutputCommitter {
   /**
    * For the framework to setup the job output during initialization
    *
    * @param jobContext Context of the job whose output is being written.
    * @throws IOException if temporary output could not be created
    */
   public abstract void setupJob(JobContext jobContext) throws IOException;

   /**
    * For cleaning up the job's output after job completion
    * @deprecated use {@link #commitJob(JobContext)} or
    *                 {@link #abortJob(JobContext, int)} instead
    */
   @Deprecated
   public void cleanupJob(JobContext jobContext) throws IOException { }

   /**
    * For committing job's output after successful job completion. Note that this
    * is invoked for jobs with final run state as {@link JobStatus#SUCCEEDED}.
    *
    * @param jobContext Context of the job whose output is being written.
    * @throws IOException
    */
   public void commitJob(JobContext jobContext) throws IOException {
     cleanupJob(jobContext);
   }

   /**
    * For cleaning up the job's output after job failure.
    *
    * @param jobContext Context of the job whose output is being written.
    * @param status Final run state of the job, should be
    * {@link JobStatus#KILLED} or {@link JobStatus#FAILED}
    * @throws IOException
    */
   public void abortJob(JobContext jobContext, int status)
   throws IOException {
     cleanupJob(jobContext);
   }

   /**
    * Sets up output for the task.
    *
    * @param taskContext Context of the task whose output is being written.
    * @throws IOException
    */
   public abstract void setupTask(TaskAttemptContext taskContext)
   throws IOException;

   /**
    * Check whether task needs a commit
    *
    * @param taskContext
    * @return true/false
    * @throws IOException
    */
   public abstract boolean needsTaskCommit(TaskAttemptContext taskContext)
   throws IOException;

   /**
    * To promote the task's temporary output to final output location
    *
    * The task's output is moved to the job's output directory.
    *
    * @param taskContext Context of the task whose output is being written.
    * @throws IOException if commit is not
    */
   public abstract void commitTask(TaskAttemptContext taskContext)
   throws IOException;

   /**
    * Discard the task output
    *
    * @param taskContext
    * @throws IOException
    */
   public abstract void abortTask(TaskAttemptContext taskContext)
   throws IOException;

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final void setupJob(org.apache.hadoop.mapreduce.JobContext jobContext
                              ) throws IOException {
     setupJob((JobContext) jobContext);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   @Deprecated
   public final void cleanupJob(org.apache.hadoop.mapreduce.JobContext context
                                ) throws IOException {
     cleanupJob((JobContext) context);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final void commitJob(org.apache.hadoop.mapreduce.JobContext context
                                ) throws IOException {
     commitJob((JobContext) context);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final void abortJob(org.apache.hadoop.mapreduce.JobContext context,
                           org.apache.hadoop.mapreduce.JobStatus.State runState)
   throws IOException {
     int state = JobStatus.getOldNewJobRunState(runState);
     if (state != JobStatus.FAILED && state != JobStatus.KILLED) {
       throw new IOException ("Invalid job run state : " + runState.name());
     }
     abortJob((JobContext) context, state);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final
   void setupTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
                  ) throws IOException {
     setupTask((TaskAttemptContext) taskContext);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final boolean
     needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
                     ) throws IOException {
     return needsTaskCommit((TaskAttemptContext) taskContext);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final
   void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
                   ) throws IOException {
     commitTask((TaskAttemptContext) taskContext);
   }

   /**
    * This method implements the new interface by calling the old method. Note
    * that the input types are different between the new and old apis and this
    * is a bridge between the two.
    */
   @Override
   public final
   void abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
                  ) throws IOException {
     abortTask((TaskAttemptContext) taskContext);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.mapred;

	import java.io.IOException;

	/**
	* <code>OutputCommitter</code> describes the commit of task output for a
	* Map-Reduce job.
	*
	* <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
	* the job to:<p>
	* <ol>
	* <li>
	* Setup the job during initialization. For example, create the temporary
	* output directory for the job during the initialization of the job.
	* </li>
	* <li>
	* Cleanup the job after the job completion. For example, remove the
	* temporary output directory after the job completion.
	* </li>
	* <li>
	* Setup the task temporary output.
	* </li>
	* <li>
	* Check whether a task needs a commit. This is to avoid the commit
	* procedure if a task does not need commit.
	* </li>
	* <li>
	* Commit of the task output.
	* </li>
	* <li>
	* Discard the task commit.
	* </li>
	* </ol>
	*
	* @see FileOutputCommitter
	* @see JobContext
	* @see TaskAttemptContext
	*/
	public abstract class OutputCommitter
	extends org.apache.hadoop.mapreduce.OutputCommitter {
	/**
	* For the framework to setup the job output during initialization
	*
	* @param jobContext Context of the job whose output is being written.
	* @throws IOException if temporary output could not be created
	*/
	public abstract void setupJob(JobContext jobContext) throws IOException;

	/**
	* For cleaning up the job's output after job completion
	* @deprecated use {@link #commitJob(JobContext)} or
	* {@link #abortJob(JobContext, int)} instead
	*/
	@Deprecated
	public void cleanupJob(JobContext jobContext) throws IOException { }

	/**
	* For committing job's output after successful job completion. Note that this
	* is invoked for jobs with final run state as {@link JobStatus#SUCCEEDED}.
	*
	* @param jobContext Context of the job whose output is being written.
	* @throws IOException
	*/
	public void commitJob(JobContext jobContext) throws IOException {
	cleanupJob(jobContext);
	}

	/**
	* For cleaning up the job's output after job failure.
	*
	* @param jobContext Context of the job whose output is being written.
	* @param status Final run state of the job, should be
	* {@link JobStatus#KILLED} or {@link JobStatus#FAILED}
	* @throws IOException
	*/
	public void abortJob(JobContext jobContext, int status)
	throws IOException {
	cleanupJob(jobContext);
	}

	/**
	* Sets up output for the task.
	*
	* @param taskContext Context of the task whose output is being written.
	* @throws IOException
	*/
	public abstract void setupTask(TaskAttemptContext taskContext)
	throws IOException;

	/**
	* Check whether task needs a commit
	*
	* @param taskContext
	* @return true/false
	* @throws IOException
	*/
	public abstract boolean needsTaskCommit(TaskAttemptContext taskContext)
	throws IOException;

	/**
	* To promote the task's temporary output to final output location
	*
	* The task's output is moved to the job's output directory.
	*
	* @param taskContext Context of the task whose output is being written.
	* @throws IOException if commit is not
	*/
	public abstract void commitTask(TaskAttemptContext taskContext)
	throws IOException;

	/**
	* Discard the task output
	*
	* @param taskContext
	* @throws IOException
	*/
	public abstract void abortTask(TaskAttemptContext taskContext)
	throws IOException;

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final void setupJob(org.apache.hadoop.mapreduce.JobContext jobContext
	) throws IOException {
	setupJob((JobContext) jobContext);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	@Deprecated
	public final void cleanupJob(org.apache.hadoop.mapreduce.JobContext context
	) throws IOException {
	cleanupJob((JobContext) context);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final void commitJob(org.apache.hadoop.mapreduce.JobContext context
	) throws IOException {
	commitJob((JobContext) context);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final void abortJob(org.apache.hadoop.mapreduce.JobContext context,
	org.apache.hadoop.mapreduce.JobStatus.State runState)
	throws IOException {
	int state = JobStatus.getOldNewJobRunState(runState);
	if (state != JobStatus.FAILED && state != JobStatus.KILLED) {
	throw new IOException ("Invalid job run state : " + runState.name());
	}
	abortJob((JobContext) context, state);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final
	void setupTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
	) throws IOException {
	setupTask((TaskAttemptContext) taskContext);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final boolean
	needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
	) throws IOException {
	return needsTaskCommit((TaskAttemptContext) taskContext);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final
	void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
	) throws IOException {
	commitTask((TaskAttemptContext) taskContext);
	}

	/**
	* This method implements the new interface by calling the old method. Note
	* that the input types are different between the new and old apis and this
	* is a bridge between the two.
	*/
	@Override
	public final
	void abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
	) throws IOException {
	abortTask((TaskAttemptContext) taskContext);
	}
	}