mapreduce/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/postexdiagnosis/tests/MapSideDiskSpill.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.vaidya.postexdiagnosis.tests;

 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.vaidya.statistics.job.JobStatistics;
 import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.JobKeys;
 import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.KeyDataType;
 import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.MapTaskKeys;
 import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.ReduceTaskKeys;
 import org.apache.hadoop.vaidya.statistics.job.MapTaskStatistics;
 import org.apache.hadoop.vaidya.DiagnosticTest;
 import org.w3c.dom.Element;
 import java.util.Hashtable;
 import java.util.List;

 /**
  *
  */
 public class MapSideDiskSpill extends DiagnosticTest {

   private double _impact;
   private JobStatistics _job;
   private long _numLocalBytesWrittenByMaps;

   /**
    *
    */
   public MapSideDiskSpill() {
   }

   /*
    *
    */
   @Override
   public double evaluate(JobStatistics job) {

     /*
      * Set the this._job
      */
     this._job = job;

     /*
      * Read the Normalization Factor
      */
     double normF = getInputElementDoubleValue("NormalizationFactor", 3.0);

     /*
      * Get the sorted map task list by number MapTaskKeys.OUTPUT_BYTES
      */
     List<MapTaskStatistics> smTaskList = job.getMapTaskList(MapTaskKeys.FILE_BYTES_WRITTEN, KeyDataType.LONG);
     int size = smTaskList.size();
     long numLocalBytesWrittenByMaps = 0;
     for (int i=0; i<size; i++) {
       numLocalBytesWrittenByMaps += smTaskList.get(i).getLongValue(MapTaskKeys.FILE_BYTES_WRITTEN);
     }
     this._numLocalBytesWrittenByMaps = numLocalBytesWrittenByMaps;

     /*
      * Map only job vs. map reduce job
      * For MapReduce job MAP_OUTPUT_BYTES are normally written by maps on local disk, so they are subtracted
      * from the localBytesWrittenByMaps.
      */
     if (job.getLongValue(JobKeys.TOTAL_REDUCES) > 0) {
       this._impact = (this._numLocalBytesWrittenByMaps - job.getLongValue(JobKeys.MAP_OUTPUT_BYTES))/job.getLongValue(JobKeys.MAP_OUTPUT_BYTES);
     } else {
       this._impact = this._numLocalBytesWrittenByMaps/job.getLongValue(JobKeys.MAP_OUTPUT_BYTES);
     }

     if (this._impact > normF) {
       this._impact = 1.0;
     } else {
       this._impact = this._impact/normF;
     }

     return this._impact;

   }

   /* (non-Javadoc)
    * @see org.apache.hadoop.contrib.utils.perfadvisor.diagnostic_rules.DiagnosticRule#getAdvice()
    */
   @Override
   public String getPrescription() {
     return
     "* Use combiner to lower the map output size.\n" +
       "* Increase map side sort buffer size (" + MRJobConfig.IO_SORT_MB +
       ":" + this._job.getJobConf().getInt(MRJobConfig.IO_SORT_MB, 0) + ").\n" +
       ") if number of Map Output Records are large. \n" +
       "* Increase (" + MRJobConfig.MAP_SORT_SPILL_PERCENT + ":" +
       this._job.getJobConf().getInt(MRJobConfig.MAP_SORT_SPILL_PERCENT, 0) +
       "), default 0.80 i.e. 80% of sort buffer size. \n";
   }

   /* (non-Javadoc)
    * @see org.apache.hadoop.contrib.utils.perfadvisor.diagnostic_rules.DiagnosticRule#getReferenceDetails()
    */
   @Override
   public String getReferenceDetails() {
     String ref =
     "* TotalMapOutputBytes: "+this._job.getLongValue(JobKeys.MAP_OUTPUT_BYTES)+"\n"+
     "* Total Local Bytes Written by Maps: "+this._numLocalBytesWrittenByMaps+"\n"+
     "* Impact: "+ truncate(this._impact);
     return ref;
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.vaidya.postexdiagnosis.tests;

	import org.apache.hadoop.mapreduce.MRJobConfig;
	import org.apache.hadoop.vaidya.statistics.job.JobStatistics;
	import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.JobKeys;
	import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.KeyDataType;
	import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.MapTaskKeys;
	import org.apache.hadoop.vaidya.statistics.job.JobStatisticsInterface.ReduceTaskKeys;
	import org.apache.hadoop.vaidya.statistics.job.MapTaskStatistics;
	import org.apache.hadoop.vaidya.DiagnosticTest;
	import org.w3c.dom.Element;
	import java.util.Hashtable;
	import java.util.List;

	/**
	*
	*/
	public class MapSideDiskSpill extends DiagnosticTest {

	private double _impact;
	private JobStatistics _job;
	private long _numLocalBytesWrittenByMaps;

	/**
	*
	*/
	public MapSideDiskSpill() {
	}

	/*
	*
	*/
	@Override
	public double evaluate(JobStatistics job) {

	/*
	* Set the this._job
	*/
	this._job = job;

	/*
	* Read the Normalization Factor
	*/
	double normF = getInputElementDoubleValue("NormalizationFactor", 3.0);

	/*
	* Get the sorted map task list by number MapTaskKeys.OUTPUT_BYTES
	*/
	List<MapTaskStatistics> smTaskList = job.getMapTaskList(MapTaskKeys.FILE_BYTES_WRITTEN, KeyDataType.LONG);
	int size = smTaskList.size();
	long numLocalBytesWrittenByMaps = 0;
	for (int i=0; i<size; i++) {
	numLocalBytesWrittenByMaps += smTaskList.get(i).getLongValue(MapTaskKeys.FILE_BYTES_WRITTEN);
	}
	this._numLocalBytesWrittenByMaps = numLocalBytesWrittenByMaps;

	/*
	* Map only job vs. map reduce job
	* For MapReduce job MAP_OUTPUT_BYTES are normally written by maps on local disk, so they are subtracted
	* from the localBytesWrittenByMaps.
	*/
	if (job.getLongValue(JobKeys.TOTAL_REDUCES) > 0) {
	this._impact = (this._numLocalBytesWrittenByMaps - job.getLongValue(JobKeys.MAP_OUTPUT_BYTES))/job.getLongValue(JobKeys.MAP_OUTPUT_BYTES);
	} else {
	this._impact = this._numLocalBytesWrittenByMaps/job.getLongValue(JobKeys.MAP_OUTPUT_BYTES);
	}

	if (this._impact > normF) {
	this._impact = 1.0;
	} else {
	this._impact = this._impact/normF;
	}

	return this._impact;

	}

	/* (non-Javadoc)
	* @see org.apache.hadoop.contrib.utils.perfadvisor.diagnostic_rules.DiagnosticRule#getAdvice()
	*/
	@Override
	public String getPrescription() {
	return
	"* Use combiner to lower the map output size.\n" +
	"* Increase map side sort buffer size (" + MRJobConfig.IO_SORT_MB +
	":" + this._job.getJobConf().getInt(MRJobConfig.IO_SORT_MB, 0) + ").\n" +
	") if number of Map Output Records are large. \n" +
	"* Increase (" + MRJobConfig.MAP_SORT_SPILL_PERCENT + ":" +
	this._job.getJobConf().getInt(MRJobConfig.MAP_SORT_SPILL_PERCENT, 0) +
	"), default 0.80 i.e. 80% of sort buffer size. \n";
	}

	/* (non-Javadoc)
	* @see org.apache.hadoop.contrib.utils.perfadvisor.diagnostic_rules.DiagnosticRule#getReferenceDetails()
	*/
	@Override
	public String getReferenceDetails() {
	String ref =
	"* TotalMapOutputBytes: "+this._job.getLongValue(JobKeys.MAP_OUTPUT_BYTES)+"\n"+
	"* Total Local Bytes Written by Maps: "+this._numLocalBytesWrittenByMaps+"\n"+
	"* Impact: "+ truncate(this._impact);
	return ref;
	}
	}