hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java - hadoop - Git at Google

 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package org.apache.hadoop.mapreduce.v2.jobhistory;

 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.URLDecoder;
 import java.net.URLEncoder;
 import static java.nio.charset.StandardCharsets.UTF_8;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;

 public class FileNameIndexUtils {

   // Sanitize job history file for predictable parsing
   static final String DELIMITER = "-";
   static final String DELIMITER_ESCAPE = "%2D";

   private static final Log LOG = LogFactory.getLog(FileNameIndexUtils.class);

   // Job history file names need to be backwards compatible
   // Only append new elements to the end of this list
   private static final int JOB_ID_INDEX = 0;
   private static final int SUBMIT_TIME_INDEX = 1;
   private static final int USER_INDEX = 2;
   private static final int JOB_NAME_INDEX = 3;
   private static final int FINISH_TIME_INDEX = 4;
   private static final int NUM_MAPS_INDEX = 5;
   private static final int NUM_REDUCES_INDEX = 6;
   private static final int JOB_STATUS_INDEX = 7;
   private static final int QUEUE_NAME_INDEX = 8;
   private static final int JOB_START_TIME_INDEX = 9;

   /**
    * Constructs the job history file name from the JobIndexInfo.
    *
    * @param indexInfo the index info.
    * @return the done job history filename.
    */
   public static String getDoneFileName(JobIndexInfo indexInfo)
       throws IOException {
     return getDoneFileName(indexInfo,
         JHAdminConfig.DEFAULT_MR_HS_JOBNAME_LIMIT);
   }

   public static String getDoneFileName(JobIndexInfo indexInfo,
       int jobNameLimit) throws IOException {
     StringBuilder sb = new StringBuilder();
     //JobId
     sb.append(encodeJobHistoryFileName(escapeDelimiters(
         TypeConverter.fromYarn(indexInfo.getJobId()).toString())));
     sb.append(DELIMITER);

     //SubmitTime
     sb.append(encodeJobHistoryFileName(String.valueOf(
         indexInfo.getSubmitTime())));
     sb.append(DELIMITER);

     //UserName
     sb.append(encodeJobHistoryFileName(escapeDelimiters(
         getUserName(indexInfo))));
     sb.append(DELIMITER);

     //JobName
     sb.append(trimURLEncodedString(encodeJobHistoryFileName(escapeDelimiters(
         getJobName(indexInfo))), jobNameLimit));
     sb.append(DELIMITER);

     //FinishTime
     sb.append(encodeJobHistoryFileName(
         String.valueOf(indexInfo.getFinishTime())));
     sb.append(DELIMITER);

     //NumMaps
     sb.append(encodeJobHistoryFileName(
         String.valueOf(indexInfo.getNumMaps())));
     sb.append(DELIMITER);

     //NumReduces
     sb.append(encodeJobHistoryFileName(
         String.valueOf(indexInfo.getNumReduces())));
     sb.append(DELIMITER);

     //JobStatus
     sb.append(encodeJobHistoryFileName(indexInfo.getJobStatus()));
     sb.append(DELIMITER);

     //QueueName
     sb.append(escapeDelimiters(encodeJobHistoryFileName(
         getQueueName(indexInfo))));
     sb.append(DELIMITER);

     //JobStartTime
     sb.append(encodeJobHistoryFileName(
         String.valueOf(indexInfo.getJobStartTime())));

     sb.append(encodeJobHistoryFileName(
         JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
     return sb.toString();
   }

   /**
    * Parses the provided job history file name to construct a
    * JobIndexInfo object which is returned.
    *
    * @param jhFileName the job history filename.
    * @return a JobIndexInfo object built from the filename.
    */
   public static JobIndexInfo getIndexInfo(String jhFileName)
       throws IOException {
     String fileName = jhFileName.substring(0,
         jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
     JobIndexInfo indexInfo = new JobIndexInfo();

     String[] jobDetails = fileName.split(DELIMITER);

     JobID oldJobId =
         JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
     JobId jobId = TypeConverter.toYarn(oldJobId);
     indexInfo.setJobId(jobId);

     // Do not fail if there are some minor parse errors
     try {
       try {
         indexInfo.setSubmitTime(Long.parseLong(
             decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
       } catch (NumberFormatException e) {
         LOG.warn("Unable to parse submit time from job history file "
             + jhFileName + " : " + e);
       }

       indexInfo.setUser(
           decodeJobHistoryFileName(jobDetails[USER_INDEX]));

       indexInfo.setJobName(
           decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX]));

       try {
         indexInfo.setFinishTime(Long.parseLong(
             decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
       } catch (NumberFormatException e) {
         LOG.warn("Unable to parse finish time from job history file "
             + jhFileName + " : " + e);
       }

       try {
         indexInfo.setNumMaps(Integer.parseInt(
             decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
       } catch (NumberFormatException e) {
         LOG.warn("Unable to parse num maps from job history file "
             + jhFileName + " : " + e);
       }

       try {
         indexInfo.setNumReduces(Integer.parseInt(
             decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
       } catch (NumberFormatException e) {
         LOG.warn("Unable to parse num reduces from job history file "
             + jhFileName + " : " + e);
       }

       indexInfo.setJobStatus(
           decodeJobHistoryFileName(jobDetails[JOB_STATUS_INDEX]));

       indexInfo.setQueueName(
           decodeJobHistoryFileName(jobDetails[QUEUE_NAME_INDEX]));

       try{
         if (jobDetails.length <= JOB_START_TIME_INDEX) {
           indexInfo.setJobStartTime(indexInfo.getSubmitTime());
         } else {
           indexInfo.setJobStartTime(Long.parseLong(
               decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX])));
         }
       } catch (NumberFormatException e){
         LOG.warn("Unable to parse start time from job history file "
             + jhFileName + " : " + e);
       }
     } catch (IndexOutOfBoundsException e) {
       LOG.warn("Parsing job history file with partial data encoded into name: "
           + jhFileName);
     }

     return indexInfo;
   }


   /**
    * Helper function to encode the URL of the filename of the job-history
    * log file.
    *
    * @param logFileName file name of the job-history file
    * @return URL encoded filename
    * @throws IOException
    */
   public static String encodeJobHistoryFileName(String logFileName)
   throws IOException {
     String replacementDelimiterEscape = null;

     // Temporarily protect the escape delimiters from encoding
     if (logFileName.contains(DELIMITER_ESCAPE)) {
       replacementDelimiterEscape = nonOccursString(logFileName);

       logFileName = logFileName.replaceAll(
           DELIMITER_ESCAPE, replacementDelimiterEscape);
     }

     String encodedFileName = null;
     try {
       encodedFileName = URLEncoder.encode(logFileName, "UTF-8");
     } catch (UnsupportedEncodingException uee) {
       IOException ioe = new IOException();
       ioe.initCause(uee);
       ioe.setStackTrace(uee.getStackTrace());
       throw ioe;
     }

     // Restore protected escape delimiters after encoding
     if (replacementDelimiterEscape != null) {
       encodedFileName = encodedFileName.replaceAll(
           replacementDelimiterEscape, DELIMITER_ESCAPE);
     }

     return encodedFileName;
   }

   /**
    * Helper function to decode the URL of the filename of the job-history
    * log file.
    *
    * @param logFileName file name of the job-history file
    * @return URL decoded filename
    * @throws IOException
    */
   public static String decodeJobHistoryFileName(String logFileName)
   throws IOException {
     String decodedFileName = null;
     try {
       decodedFileName = URLDecoder.decode(logFileName, "UTF-8");
     } catch (UnsupportedEncodingException uee) {
       IOException ioe = new IOException();
       ioe.initCause(uee);
       ioe.setStackTrace(uee.getStackTrace());
       throw ioe;
     }
     return decodedFileName;
   }

   static String nonOccursString(String logFileName) {
     int adHocIndex = 0;

     String unfoundString = "q" + adHocIndex;

     while (logFileName.contains(unfoundString)) {
       unfoundString = "q" + ++adHocIndex;
     }

     return unfoundString + "q";
   }

   private static String getUserName(JobIndexInfo indexInfo) {
     return getNonEmptyString(indexInfo.getUser());
   }

   private static String getJobName(JobIndexInfo indexInfo) {
     return getNonEmptyString(indexInfo.getJobName());
   }

   private static String getQueueName(JobIndexInfo indexInfo) {
     return getNonEmptyString(indexInfo.getQueueName());
   }

   //TODO Maybe handle default values for longs and integers here?

   private static String getNonEmptyString(String in) {
     if (in == null || in.length() == 0) {
       in = "NA";
     }
     return in;
   }

   private static String escapeDelimiters(String escapee) {
     return escapee.replaceAll(DELIMITER, DELIMITER_ESCAPE);
   }

   /**
    * Trims the url-encoded string if required
    */
   private static String trimURLEncodedString(
       String encodedString, int limitLength) {
     assert(limitLength >= 0) : "limitLength should be positive integer";

     if (encodedString.length() <= limitLength) {
       return encodedString;
     }

     int index = 0;
     int increase = 0;
     byte[] strBytes = encodedString.getBytes(UTF_8);

     // calculate effective character length based on UTF-8 specification.
     // The size of a character coded in UTF-8 should be 4-byte at most.
     // See RFC3629
     while (true) {
       byte b = strBytes[index];
       if (b == '%') {
         byte minuend1 = strBytes[index + 1];
         byte subtrahend1 = (byte)(Character.isDigit(
             minuend1) ? '0' : 'A' - 10);
         byte minuend2 = strBytes[index + 2];
         byte subtrahend2 = (byte)(Character.isDigit(
             minuend2) ? '0' : 'A' - 10);
         int initialHex =
             ((Character.toUpperCase(minuend1) - subtrahend1) << 4) +
             (Character.toUpperCase(minuend2) - subtrahend2);

         if (0x00 <= initialHex && initialHex <= 0x7F) {
           // For 1-byte UTF-8 characters
           increase = 3;
         } else if (0xC2 <= initialHex && initialHex <= 0xDF) {
           // For 2-byte UTF-8 characters
           increase = 6;
         } else if (0xE0 <= initialHex && initialHex <= 0xEF) {
           // For 3-byte UTF-8 characters
           increase = 9;
         } else {
           // For 4-byte UTF-8 characters
           increase = 12;
         }
       } else {
         increase = 1;
       }
       if (index + increase > limitLength) {
         break;
       } else {
         index += increase;
       }
     }

     return encodedString.substring(0, index);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.mapreduce.v2.jobhistory;

	import java.io.IOException;
	import java.io.UnsupportedEncodingException;
	import java.net.URLDecoder;
	import java.net.URLEncoder;
	import static java.nio.charset.StandardCharsets.UTF_8;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;

	import org.apache.hadoop.mapreduce.JobID;
	import org.apache.hadoop.mapreduce.TypeConverter;
	import org.apache.hadoop.mapreduce.v2.api.records.JobId;

	public class FileNameIndexUtils {

	// Sanitize job history file for predictable parsing
	static final String DELIMITER = "-";
	static final String DELIMITER_ESCAPE = "%2D";

	private static final Log LOG = LogFactory.getLog(FileNameIndexUtils.class);

	// Job history file names need to be backwards compatible
	// Only append new elements to the end of this list
	private static final int JOB_ID_INDEX = 0;
	private static final int SUBMIT_TIME_INDEX = 1;
	private static final int USER_INDEX = 2;
	private static final int JOB_NAME_INDEX = 3;
	private static final int FINISH_TIME_INDEX = 4;
	private static final int NUM_MAPS_INDEX = 5;
	private static final int NUM_REDUCES_INDEX = 6;
	private static final int JOB_STATUS_INDEX = 7;
	private static final int QUEUE_NAME_INDEX = 8;
	private static final int JOB_START_TIME_INDEX = 9;

	/**
	* Constructs the job history file name from the JobIndexInfo.
	*
	* @param indexInfo the index info.
	* @return the done job history filename.
	*/
	public static String getDoneFileName(JobIndexInfo indexInfo)
	throws IOException {
	return getDoneFileName(indexInfo,
	JHAdminConfig.DEFAULT_MR_HS_JOBNAME_LIMIT);
	}

	public static String getDoneFileName(JobIndexInfo indexInfo,
	int jobNameLimit) throws IOException {
	StringBuilder sb = new StringBuilder();
	//JobId
	sb.append(encodeJobHistoryFileName(escapeDelimiters(
	TypeConverter.fromYarn(indexInfo.getJobId()).toString())));
	sb.append(DELIMITER);

	//SubmitTime
	sb.append(encodeJobHistoryFileName(String.valueOf(
	indexInfo.getSubmitTime())));
	sb.append(DELIMITER);

	//UserName
	sb.append(encodeJobHistoryFileName(escapeDelimiters(
	getUserName(indexInfo))));
	sb.append(DELIMITER);

	//JobName
	sb.append(trimURLEncodedString(encodeJobHistoryFileName(escapeDelimiters(
	getJobName(indexInfo))), jobNameLimit));
	sb.append(DELIMITER);

	//FinishTime
	sb.append(encodeJobHistoryFileName(
	String.valueOf(indexInfo.getFinishTime())));
	sb.append(DELIMITER);

	//NumMaps
	sb.append(encodeJobHistoryFileName(
	String.valueOf(indexInfo.getNumMaps())));
	sb.append(DELIMITER);

	//NumReduces
	sb.append(encodeJobHistoryFileName(
	String.valueOf(indexInfo.getNumReduces())));
	sb.append(DELIMITER);

	//JobStatus
	sb.append(encodeJobHistoryFileName(indexInfo.getJobStatus()));
	sb.append(DELIMITER);

	//QueueName
	sb.append(escapeDelimiters(encodeJobHistoryFileName(
	getQueueName(indexInfo))));
	sb.append(DELIMITER);

	//JobStartTime
	sb.append(encodeJobHistoryFileName(
	String.valueOf(indexInfo.getJobStartTime())));

	sb.append(encodeJobHistoryFileName(
	JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
	return sb.toString();
	}

	/**
	* Parses the provided job history file name to construct a
	* JobIndexInfo object which is returned.
	*
	* @param jhFileName the job history filename.
	* @return a JobIndexInfo object built from the filename.
	*/
	public static JobIndexInfo getIndexInfo(String jhFileName)
	throws IOException {
	String fileName = jhFileName.substring(0,
	jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
	JobIndexInfo indexInfo = new JobIndexInfo();

	String[] jobDetails = fileName.split(DELIMITER);

	JobID oldJobId =
	JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
	JobId jobId = TypeConverter.toYarn(oldJobId);
	indexInfo.setJobId(jobId);

	// Do not fail if there are some minor parse errors
	try {
	try {
	indexInfo.setSubmitTime(Long.parseLong(
	decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
	} catch (NumberFormatException e) {
	LOG.warn("Unable to parse submit time from job history file "
	+ jhFileName + " : " + e);
	}

	indexInfo.setUser(
	decodeJobHistoryFileName(jobDetails[USER_INDEX]));

	indexInfo.setJobName(
	decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX]));

	try {
	indexInfo.setFinishTime(Long.parseLong(
	decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
	} catch (NumberFormatException e) {
	LOG.warn("Unable to parse finish time from job history file "
	+ jhFileName + " : " + e);
	}

	try {
	indexInfo.setNumMaps(Integer.parseInt(
	decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
	} catch (NumberFormatException e) {
	LOG.warn("Unable to parse num maps from job history file "
	+ jhFileName + " : " + e);
	}

	try {
	indexInfo.setNumReduces(Integer.parseInt(
	decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
	} catch (NumberFormatException e) {
	LOG.warn("Unable to parse num reduces from job history file "
	+ jhFileName + " : " + e);
	}

	indexInfo.setJobStatus(
	decodeJobHistoryFileName(jobDetails[JOB_STATUS_INDEX]));

	indexInfo.setQueueName(
	decodeJobHistoryFileName(jobDetails[QUEUE_NAME_INDEX]));

	try{
	if (jobDetails.length <= JOB_START_TIME_INDEX) {
	indexInfo.setJobStartTime(indexInfo.getSubmitTime());
	} else {
	indexInfo.setJobStartTime(Long.parseLong(
	decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX])));
	}
	} catch (NumberFormatException e){
	LOG.warn("Unable to parse start time from job history file "
	+ jhFileName + " : " + e);
	}
	} catch (IndexOutOfBoundsException e) {
	LOG.warn("Parsing job history file with partial data encoded into name: "
	+ jhFileName);
	}

	return indexInfo;
	}


	/**
	* Helper function to encode the URL of the filename of the job-history
	* log file.
	*
	* @param logFileName file name of the job-history file
	* @return URL encoded filename
	* @throws IOException
	*/
	public static String encodeJobHistoryFileName(String logFileName)
	throws IOException {
	String replacementDelimiterEscape = null;

	// Temporarily protect the escape delimiters from encoding
	if (logFileName.contains(DELIMITER_ESCAPE)) {
	replacementDelimiterEscape = nonOccursString(logFileName);

	logFileName = logFileName.replaceAll(
	DELIMITER_ESCAPE, replacementDelimiterEscape);
	}

	String encodedFileName = null;
	try {
	encodedFileName = URLEncoder.encode(logFileName, "UTF-8");
	} catch (UnsupportedEncodingException uee) {
	IOException ioe = new IOException();
	ioe.initCause(uee);
	ioe.setStackTrace(uee.getStackTrace());
	throw ioe;
	}

	// Restore protected escape delimiters after encoding
	if (replacementDelimiterEscape != null) {
	encodedFileName = encodedFileName.replaceAll(
	replacementDelimiterEscape, DELIMITER_ESCAPE);
	}

	return encodedFileName;
	}

	/**
	* Helper function to decode the URL of the filename of the job-history
	* log file.
	*
	* @param logFileName file name of the job-history file
	* @return URL decoded filename
	* @throws IOException
	*/
	public static String decodeJobHistoryFileName(String logFileName)
	throws IOException {
	String decodedFileName = null;
	try {
	decodedFileName = URLDecoder.decode(logFileName, "UTF-8");
	} catch (UnsupportedEncodingException uee) {
	IOException ioe = new IOException();
	ioe.initCause(uee);
	ioe.setStackTrace(uee.getStackTrace());
	throw ioe;
	}
	return decodedFileName;
	}

	static String nonOccursString(String logFileName) {
	int adHocIndex = 0;

	String unfoundString = "q" + adHocIndex;

	while (logFileName.contains(unfoundString)) {
	unfoundString = "q" + ++adHocIndex;
	}

	return unfoundString + "q";
	}

	private static String getUserName(JobIndexInfo indexInfo) {
	return getNonEmptyString(indexInfo.getUser());
	}

	private static String getJobName(JobIndexInfo indexInfo) {
	return getNonEmptyString(indexInfo.getJobName());
	}

	private static String getQueueName(JobIndexInfo indexInfo) {
	return getNonEmptyString(indexInfo.getQueueName());
	}

	//TODO Maybe handle default values for longs and integers here?

	private static String getNonEmptyString(String in) {
	if (in == null \|\| in.length() == 0) {
	in = "NA";
	}
	return in;
	}

	private static String escapeDelimiters(String escapee) {
	return escapee.replaceAll(DELIMITER, DELIMITER_ESCAPE);
	}

	/**
	* Trims the url-encoded string if required
	*/
	private static String trimURLEncodedString(
	String encodedString, int limitLength) {
	assert(limitLength >= 0) : "limitLength should be positive integer";

	if (encodedString.length() <= limitLength) {
	return encodedString;
	}

	int index = 0;
	int increase = 0;
	byte[] strBytes = encodedString.getBytes(UTF_8);

	// calculate effective character length based on UTF-8 specification.
	// The size of a character coded in UTF-8 should be 4-byte at most.
	// See RFC3629
	while (true) {
	byte b = strBytes[index];
	if (b == '%') {
	byte minuend1 = strBytes[index + 1];
	byte subtrahend1 = (byte)(Character.isDigit(
	minuend1) ? '0' : 'A' - 10);
	byte minuend2 = strBytes[index + 2];
	byte subtrahend2 = (byte)(Character.isDigit(
	minuend2) ? '0' : 'A' - 10);
	int initialHex =
	((Character.toUpperCase(minuend1) - subtrahend1) << 4) +
	(Character.toUpperCase(minuend2) - subtrahend2);

	if (0x00 <= initialHex && initialHex <= 0x7F) {
	// For 1-byte UTF-8 characters
	increase = 3;
	} else if (0xC2 <= initialHex && initialHex <= 0xDF) {
	// For 2-byte UTF-8 characters
	increase = 6;
	} else if (0xE0 <= initialHex && initialHex <= 0xEF) {
	// For 3-byte UTF-8 characters
	increase = 9;
	} else {
	// For 4-byte UTF-8 characters
	increase = 12;
	}
	} else {
	increase = 1;
	}
	if (index + increase > limitLength) {
	break;
	} else {
	index += increase;
	}
	}

	return encodedString.substring(0, index);
	}
	}