| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.mapred; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.net.InetSocketAddress; |
| import java.security.PrivilegedExceptionAction; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.fs.FSError; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.ipc.RPC; |
| import org.apache.hadoop.mapreduce.JobContext; |
| import org.apache.hadoop.mapreduce.TaskType; |
| import org.apache.hadoop.mapreduce.security.TokenCache; |
| import org.apache.hadoop.security.Credentials; |
| import org.apache.hadoop.security.UserGroupInformation; |
| import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; |
| import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; |
| import org.apache.hadoop.metrics.MetricsContext; |
| import org.apache.hadoop.metrics.MetricsUtil; |
| import org.apache.hadoop.metrics.jvm.JvmMetrics; |
| import org.apache.hadoop.security.token.Token; |
| import org.apache.hadoop.util.Shell; |
| import org.apache.hadoop.util.StringUtils; |
| import org.apache.log4j.LogManager; |
| |
| /** |
| * The main() for child processes. |
| */ |
| |
| class Child { |
| |
| public static final Log LOG = |
| LogFactory.getLog(Child.class); |
| |
| static volatile TaskAttemptID taskid = null; |
| static volatile boolean isCleanup; |
| |
| public static void main(String[] args) throws Throwable { |
| LOG.debug("Child starting"); |
| |
| final JobConf defaultConf = new JobConf(); |
| // set tcp nodelay |
| defaultConf.setBoolean("ipc.client.tcpnodelay", true); |
| |
| String host = args[0]; |
| int port = Integer.parseInt(args[1]); |
| final InetSocketAddress address = new InetSocketAddress(host, port); |
| final TaskAttemptID firstTaskid = TaskAttemptID.forName(args[2]); |
| final String logLocation = args[3]; |
| final int SLEEP_LONGER_COUNT = 5; |
| int jvmIdInt = Integer.parseInt(args[4]); |
| JVMId jvmId = new JVMId(firstTaskid.getJobID(), |
| firstTaskid.getTaskType() == TaskType.MAP,jvmIdInt); |
| |
| //load token cache storage |
| String jobTokenFile = |
| System.getenv().get(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION); |
| Credentials credentials = |
| TokenCache.loadTokens(jobTokenFile, defaultConf); |
| LOG.debug("loading token. # keys =" +credentials.numberOfSecretKeys() + |
| "; from file=" + jobTokenFile); |
| Token<JobTokenIdentifier> jt = TokenCache.getJobToken(credentials); |
| jt.setService(new Text(address.getAddress().getHostAddress() + ":" |
| + address.getPort())); |
| UserGroupInformation current = UserGroupInformation.getCurrentUser(); |
| current.addToken(jt); |
| |
| // Create TaskUmbilicalProtocol as actual task owner. |
| UserGroupInformation taskOwner |
| = UserGroupInformation.createRemoteUser(firstTaskid.getJobID().toString()); |
| taskOwner.addToken(jt); |
| |
| // Set the credentials |
| defaultConf.setCredentials(credentials); |
| |
| final TaskUmbilicalProtocol umbilical = |
| taskOwner.doAs(new PrivilegedExceptionAction<TaskUmbilicalProtocol>() { |
| @Override |
| public TaskUmbilicalProtocol run() throws Exception { |
| return (TaskUmbilicalProtocol)RPC.getProxy(TaskUmbilicalProtocol.class, |
| TaskUmbilicalProtocol.versionID, |
| address, |
| defaultConf); |
| } |
| }); |
| |
| int numTasksToExecute = -1; //-1 signifies "no limit" |
| int numTasksExecuted = 0; |
| Runtime.getRuntime().addShutdownHook(new Thread() { |
| public void run() { |
| try { |
| if (taskid != null) { |
| TaskLog.syncLogs(logLocation, taskid, isCleanup); |
| } |
| } catch (Throwable throwable) { |
| } |
| } |
| }); |
| Thread t = new Thread() { |
| public void run() { |
| //every so often wake up and syncLogs so that we can track |
| //logs of the currently running task |
| while (true) { |
| try { |
| Thread.sleep(5000); |
| if (taskid != null) { |
| TaskLog.syncLogs(logLocation, taskid, isCleanup); |
| } |
| } catch (InterruptedException ie) { |
| } catch (IOException iee) { |
| LOG.error("Error in syncLogs: " + iee); |
| System.exit(-1); |
| } |
| } |
| } |
| }; |
| t.setName("Thread for syncLogs"); |
| t.setDaemon(true); |
| t.start(); |
| |
| String pid = ""; |
| if (!Shell.WINDOWS) { |
| pid = System.getenv().get("JVM_PID"); |
| } |
| JvmContext context = new JvmContext(jvmId, pid); |
| int idleLoopCount = 0; |
| Task task = null; |
| |
| UserGroupInformation childUGI = null; |
| |
| try { |
| while (true) { |
| taskid = null; |
| JvmTask myTask = umbilical.getTask(context); |
| if (myTask.shouldDie()) { |
| break; |
| } else { |
| if (myTask.getTask() == null) { |
| taskid = null; |
| if (++idleLoopCount >= SLEEP_LONGER_COUNT) { |
| //we sleep for a bigger interval when we don't receive |
| //tasks for a while |
| Thread.sleep(1500); |
| } else { |
| Thread.sleep(500); |
| } |
| continue; |
| } |
| } |
| idleLoopCount = 0; |
| task = myTask.getTask(); |
| taskid = task.getTaskID(); |
| isCleanup = task.isTaskCleanupTask(); |
| // reset the statistics for the task |
| FileSystem.clearStatistics(); |
| |
| //create the index file so that the log files |
| //are viewable immediately |
| TaskLog.syncLogs(logLocation, taskid, isCleanup); |
| |
| // Create the job-conf and set credentials |
| final JobConf job = new JobConf(task.getJobFile()); |
| job.setCredentials(defaultConf.getCredentials()); |
| |
| // set the jobTokenFile into task |
| task.setJobTokenSecret(JobTokenSecretManager. |
| createSecretKey(jt.getPassword())); |
| |
| // setup the child's Configs.LOCAL_DIR. The child is now sandboxed and |
| // can only see files down and under attemtdir only. |
| TaskRunner.setupChildMapredLocalDirs(task, job); |
| |
| //setupWorkDir actually sets up the symlinks for the distributed |
| //cache. After a task exits we wipe the workdir clean, and hence |
| //the symlinks have to be rebuilt. |
| TaskRunner.setupWorkDir(job, new File(".").getAbsoluteFile()); |
| |
| numTasksToExecute = job.getNumTasksToExecutePerJvm(); |
| assert(numTasksToExecute != 0); |
| |
| task.setConf(job); |
| |
| // Initiate Java VM metrics |
| JvmMetrics.init(task.getPhase().toString(), job.getSessionId()); |
| LOG.debug("Creating remote user to execute task: " + job.get("user.name")); |
| childUGI = UserGroupInformation.createRemoteUser(job.get("user.name")); |
| // Add tokens to new user so that it may execute its task correctly. |
| for(Token<?> token : UserGroupInformation.getCurrentUser().getTokens()) { |
| childUGI.addToken(token); |
| } |
| |
| // Create a final reference to the task for the doAs block |
| final Task taskFinal = task; |
| childUGI.doAs(new PrivilegedExceptionAction<Object>() { |
| @Override |
| public Object run() throws Exception { |
| try { |
| // use job-specified working directory |
| FileSystem.get(job).setWorkingDirectory(job.getWorkingDirectory()); |
| taskFinal.run(job, umbilical); // run the task |
| } finally { |
| TaskLog.syncLogs(logLocation, taskid, isCleanup); |
| } |
| |
| return null; |
| } |
| }); |
| |
| if (numTasksToExecute > 0 && ++numTasksExecuted == numTasksToExecute) { |
| break; |
| } |
| } |
| } catch (FSError e) { |
| LOG.fatal("FSError from child", e); |
| umbilical.fsError(taskid, e.getMessage()); |
| } catch (Exception exception) { |
| LOG.warn("Exception running child : " |
| + StringUtils.stringifyException(exception)); |
| try { |
| if (task != null) { |
| // do cleanup for the task |
| if(childUGI == null) { // no need to job into doAs block |
| task.taskCleanup(umbilical); |
| } else { |
| final Task taskFinal = task; |
| childUGI.doAs(new PrivilegedExceptionAction<Object>() { |
| @Override |
| public Object run() throws Exception { |
| taskFinal.taskCleanup(umbilical); |
| return null; |
| } |
| }); |
| } |
| } |
| } catch (Exception e) { |
| LOG.info("Exception cleaning up : " + StringUtils.stringifyException(e)); |
| } |
| // Report back any failures, for diagnostic purposes |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| exception.printStackTrace(new PrintStream(baos)); |
| if (taskid != null) { |
| umbilical.reportDiagnosticInfo(taskid, baos.toString()); |
| } |
| } catch (Throwable throwable) { |
| LOG.fatal("Error running child : " |
| + StringUtils.stringifyException(throwable)); |
| if (taskid != null) { |
| Throwable tCause = throwable.getCause(); |
| String cause = tCause == null |
| ? throwable.getMessage() |
| : StringUtils.stringifyException(tCause); |
| umbilical.fatalError(taskid, cause); |
| } |
| } finally { |
| RPC.stopProxy(umbilical); |
| MetricsContext metricsContext = MetricsUtil.getContext("mapred"); |
| metricsContext.close(); |
| // Shutting down log4j of the child-vm... |
| // This assumes that on return from Task.run() |
| // there is no more logging done. |
| LogManager.shutdown(); |
| } |
| } |
| } |