hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/DistributedSchedulingService.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.api.DistributedSchedulerProtocol;
 import org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl;


 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.server.api.protocolrecords.DistSchedAllocateResponse;
 import org.apache.hadoop.yarn.server.api.protocolrecords.DistSchedRegisterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;

 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.proto.ApplicationMasterProtocol.ApplicationMasterProtocolService;

 import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
 import org.apache.hadoop.yarn.server.resourcemanager.ClusterMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.NodeSelector;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
     .NodeAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
     .NodeRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
     .NodeResourceUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
     .NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
     .SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.security
     .AMRMTokenSecretManager;

 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;

 /**
  * The DistributedSchedulingService is started instead of the
  * ApplicationMasterService if DistributedScheduling is enabled for the YARN
  * cluster.
  * It extends the functionality of the ApplicationMasterService by servicing
  * clients (AMs and AMRMProxy request interceptors) that understand the
  * DistributedSchedulingProtocol.
  */
 public class DistributedSchedulingService extends ApplicationMasterService
     implements DistributedSchedulerProtocol, EventHandler<SchedulerEvent> {

   private static final Log LOG =
       LogFactory.getLog(DistributedSchedulingService.class);

   private final ClusterMonitor clusterMonitor;
   private final NodeSelector nodeSelector;

   private final ConcurrentHashMap<String, Set<NodeId>> rackToNode =
       new ConcurrentHashMap<>();
   private final ConcurrentHashMap<String, Set<NodeId>> hostToNode =
       new ConcurrentHashMap<>();

   public DistributedSchedulingService(RMContext rmContext,
       YarnScheduler scheduler) {
     super(DistributedSchedulingService.class.getName(), rmContext, scheduler);
     int k = rmContext.getYarnConfiguration().getInt(
         YarnConfiguration.DIST_SCHEDULING_TOP_K,
         YarnConfiguration.DIST_SCHEDULING_TOP_K_DEFAULT);
     long topKComputationInterval = rmContext.getYarnConfiguration().getLong(
         YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPUTE_INT_MS,
         YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPUTE_INT_MS_DEFAULT);
     TopKNodeSelector.TopKComparator comparator =
         TopKNodeSelector.TopKComparator.valueOf(
             rmContext.getYarnConfiguration().get(
                 YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPARATOR,
                 YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPARATOR_DEFAULT));
     TopKNodeSelector topKSelector =
         new TopKNodeSelector(k, topKComputationInterval, comparator);
     this.clusterMonitor = topKSelector;
     this.nodeSelector = topKSelector;
   }

   @Override
   public Server getServer(YarnRPC rpc, Configuration serverConf,
       InetSocketAddress addr, AMRMTokenSecretManager secretManager) {
     Server server = rpc.getServer(DistributedSchedulerProtocol.class, this,
         addr, serverConf, secretManager,
         serverConf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT,
             YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT));
     // To support application running on NMs that DO NOT support
     // Dist Scheduling... The server multiplexes both the
     // ApplicationMasterProtocol as well as the DistributedSchedulingProtocol
     ((RPC.Server) server).addProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER,
         ApplicationMasterProtocolPB.class,
         ApplicationMasterProtocolService.newReflectiveBlockingService(
             new ApplicationMasterProtocolPBServiceImpl(this)));
     return server;
   }

   @Override
   public RegisterApplicationMasterResponse registerApplicationMaster
       (RegisterApplicationMasterRequest request) throws YarnException,
       IOException {
     return super.registerApplicationMaster(request);
   }

   @Override
   public FinishApplicationMasterResponse finishApplicationMaster
       (FinishApplicationMasterRequest request) throws YarnException,
       IOException {
     return super.finishApplicationMaster(request);
   }

   @Override
   public AllocateResponse allocate(AllocateRequest request) throws
       YarnException, IOException {
     return super.allocate(request);
   }

   @Override
   public DistSchedRegisterResponse
   registerApplicationMasterForDistributedScheduling(
       RegisterApplicationMasterRequest request) throws YarnException,
       IOException {
     RegisterApplicationMasterResponse response =
         registerApplicationMaster(request);
     DistSchedRegisterResponse dsResp = recordFactory
         .newRecordInstance(DistSchedRegisterResponse.class);
     dsResp.setRegisterResponse(response);
     dsResp.setMinAllocatableCapabilty(
         Resource.newInstance(
             getConfig().getInt(
                 YarnConfiguration.DIST_SCHEDULING_MIN_MEMORY,
                 YarnConfiguration.DIST_SCHEDULING_MIN_MEMORY_DEFAULT),
             getConfig().getInt(
                 YarnConfiguration.DIST_SCHEDULING_MIN_VCORES,
                 YarnConfiguration.DIST_SCHEDULING_MIN_VCORES_DEFAULT)
         )
     );
     dsResp.setMaxAllocatableCapabilty(
         Resource.newInstance(
             getConfig().getInt(
                 YarnConfiguration.DIST_SCHEDULING_MAX_MEMORY,
                 YarnConfiguration.DIST_SCHEDULING_MAX_MEMORY_DEFAULT),
             getConfig().getInt(
                 YarnConfiguration.DIST_SCHEDULING_MAX_VCORES,
                 YarnConfiguration.DIST_SCHEDULING_MAX_VCORES_DEFAULT)
         )
     );
     dsResp.setIncrAllocatableCapabilty(
         Resource.newInstance(
             getConfig().getInt(
                 YarnConfiguration.DIST_SCHEDULING_INCR_MEMORY,
                 YarnConfiguration.DIST_SCHEDULING_INCR_MEMORY_DEFAULT),
             getConfig().getInt(
                 YarnConfiguration.DIST_SCHEDULING_INCR_VCORES,
                 YarnConfiguration.DIST_SCHEDULING_INCR_VCORES_DEFAULT)
         )
     );
     dsResp.setContainerTokenExpiryInterval(
         getConfig().getInt(
             YarnConfiguration.DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS,
             YarnConfiguration.
                 DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS_DEFAULT));
     dsResp.setContainerIdStart(
         this.rmContext.getEpoch() << ResourceManager.EPOCH_BIT_SHIFT);

     // Set nodes to be used for scheduling
     dsResp.setNodesForScheduling(
         new ArrayList<>(this.nodeSelector.selectNodes()));
     return dsResp;
   }

   @Override
   public DistSchedAllocateResponse allocateForDistributedScheduling
       (AllocateRequest request) throws YarnException, IOException {
     AllocateResponse response = allocate(request);
     DistSchedAllocateResponse dsResp = recordFactory.newRecordInstance
         (DistSchedAllocateResponse.class);
     dsResp.setAllocateResponse(response);
     dsResp.setNodesForScheduling(
         new ArrayList<>(
             this.nodeSelector.selectNodes(createSelectionHints(request))));
     return dsResp;
   }

   /**
    * Create a selection criteria. Essentially, it checks for requests that has
    * relax locality set to false and is a Rack local or Node local request.
    * If so, it will add all Nodes in that rack / node (If multiple NMs are
    * running on the node)to the selection criteria.  It will also set the
    * min number of nodes required (1 in case of node local reqs) to be equal
    * to the number of containers required.. so that the local RM can spread the
    * containers across the returned nodes.
    * @param request
    * @return Collection of SelectionHint
    */
   private Collection<NodeSelector.SelectionHint> createSelectionHints(
       AllocateRequest request) {
     List<NodeSelector.SelectionHint> retList = new ArrayList<>();
     // TODO: Add support for node labels (support obtaining a list of nodes
     //       given a label expression)
     for (ResourceRequest rr : request.getAskList()) {
       if (!rr.getRelaxLocality()
           && rackToNode.containsKey(rr.getResourceName())) {
         retList.add(new NodeSelector.SelectionHint(
             rackToNode.get(rr.getResourceName()), rr.getNumContainers()));
       }
       if (!rr.getRelaxLocality()
           && hostToNode.containsKey(rr.getResourceName())) {
         retList.add(new NodeSelector.SelectionHint(
             hostToNode.get(rr.getResourceName()), 1));
       }
     }
     return retList;
   }

   private void addToMapping(ConcurrentHashMap<String, Set<NodeId>> mapping,
       String rackName, NodeId nodeId) {
     if (rackName != null) {
       mapping.putIfAbsent(rackName, new HashSet<NodeId>());
       Set<NodeId> nodeIds = mapping.get(rackName);
       synchronized (nodeIds) {
         nodeIds.add(nodeId);
       }
     }
   }

   private void removeFromMapping(ConcurrentHashMap<String, Set<NodeId>> mapping,
       String rackName, NodeId nodeId) {
     if (rackName != null) {
       Set<NodeId> nodeIds = mapping.get(rackName);
       synchronized (nodeIds) {
         nodeIds.remove(nodeId);
       }
     }
   }

   @Override
   public void handle(SchedulerEvent event) {
     switch (event.getType()) {
       case NODE_ADDED:
         if (!(event instanceof NodeAddedSchedulerEvent)) {
           throw new RuntimeException("Unexpected event type: " + event);
         }
         NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
         clusterMonitor.addNode(nodeAddedEvent.getContainerReports(),
             nodeAddedEvent.getAddedRMNode());
         addToMapping(rackToNode, nodeAddedEvent.getAddedRMNode().getRackName(),
             nodeAddedEvent.getAddedRMNode().getNodeID());
         addToMapping(hostToNode, nodeAddedEvent.getAddedRMNode().getHostName(),
             nodeAddedEvent.getAddedRMNode().getNodeID());
         break;
       case NODE_REMOVED:
         if (!(event instanceof NodeRemovedSchedulerEvent)) {
           throw new RuntimeException("Unexpected event type: " + event);
         }
         NodeRemovedSchedulerEvent nodeRemovedEvent =
             (NodeRemovedSchedulerEvent)event;
         clusterMonitor.removeNode(nodeRemovedEvent.getRemovedRMNode());
         removeFromMapping(rackToNode,
             nodeRemovedEvent.getRemovedRMNode().getRackName(),
             nodeRemovedEvent.getRemovedRMNode().getNodeID());
         removeFromMapping(hostToNode,
             nodeRemovedEvent.getRemovedRMNode().getHostName(),
             nodeRemovedEvent.getRemovedRMNode().getNodeID());
         break;
       case NODE_UPDATE:
         if (!(event instanceof NodeUpdateSchedulerEvent)) {
           throw new RuntimeException("Unexpected event type: " + event);
         }
         NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
         clusterMonitor.nodeUpdate(nodeUpdatedEvent.getRMNode());
         break;
       case NODE_RESOURCE_UPDATE:
         if (!(event instanceof NodeResourceUpdateSchedulerEvent)) {
           throw new RuntimeException("Unexpected event type: " + event);
         }
         NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent =
             (NodeResourceUpdateSchedulerEvent)event;
         clusterMonitor.updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
             nodeResourceUpdatedEvent.getResourceOption());
         break;

       // <-- IGNORED EVENTS : START -->
       case APP_ADDED:
         break;
       case APP_REMOVED:
         break;
       case APP_ATTEMPT_ADDED:
         break;
       case APP_ATTEMPT_REMOVED:
         break;
       case CONTAINER_EXPIRED:
         break;
       case NODE_LABELS_UPDATE:
         break;
       // <-- IGNORED EVENTS : END -->
       default:
         LOG.error("Unknown event arrived at DistributedSchedulingService: "
             + event.toString());
     }
   }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.ipc.RPC;
	import org.apache.hadoop.ipc.Server;
	import org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB;
	import org.apache.hadoop.yarn.api.records.NodeId;
	import org.apache.hadoop.yarn.api.records.ResourceRequest;
	import org.apache.hadoop.yarn.event.EventHandler;
	import org.apache.hadoop.yarn.server.api.DistributedSchedulerProtocol;
	import org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl;


	import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
	import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
	import org.apache.hadoop.yarn.server.api.protocolrecords.DistSchedAllocateResponse;
	import org.apache.hadoop.yarn.server.api.protocolrecords.DistSchedRegisterResponse;
	import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
	import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
	import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
	import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;

	import org.apache.hadoop.yarn.api.records.Resource;
	import org.apache.hadoop.yarn.conf.YarnConfiguration;
	import org.apache.hadoop.yarn.exceptions.YarnException;
	import org.apache.hadoop.yarn.ipc.YarnRPC;
	import org.apache.hadoop.yarn.proto.ApplicationMasterProtocol.ApplicationMasterProtocolService;

	import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
	import org.apache.hadoop.yarn.server.resourcemanager.ClusterMonitor;
	import org.apache.hadoop.yarn.server.resourcemanager.NodeSelector;
	import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
	import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
	.NodeAddedSchedulerEvent;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
	.NodeRemovedSchedulerEvent;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
	.NodeResourceUpdateSchedulerEvent;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
	.NodeUpdateSchedulerEvent;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
	.SchedulerEvent;
	import org.apache.hadoop.yarn.server.resourcemanager.security
	.AMRMTokenSecretManager;

	import java.io.IOException;
	import java.net.InetSocketAddress;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Set;
	import java.util.concurrent.ConcurrentHashMap;

	/**
	* The DistributedSchedulingService is started instead of the
	* ApplicationMasterService if DistributedScheduling is enabled for the YARN
	* cluster.
	* It extends the functionality of the ApplicationMasterService by servicing
	* clients (AMs and AMRMProxy request interceptors) that understand the
	* DistributedSchedulingProtocol.
	*/
	public class DistributedSchedulingService extends ApplicationMasterService
	implements DistributedSchedulerProtocol, EventHandler<SchedulerEvent> {

	private static final Log LOG =
	LogFactory.getLog(DistributedSchedulingService.class);

	private final ClusterMonitor clusterMonitor;
	private final NodeSelector nodeSelector;

	private final ConcurrentHashMap<String, Set<NodeId>> rackToNode =
	new ConcurrentHashMap<>();
	private final ConcurrentHashMap<String, Set<NodeId>> hostToNode =
	new ConcurrentHashMap<>();

	public DistributedSchedulingService(RMContext rmContext,
	YarnScheduler scheduler) {
	super(DistributedSchedulingService.class.getName(), rmContext, scheduler);
	int k = rmContext.getYarnConfiguration().getInt(
	YarnConfiguration.DIST_SCHEDULING_TOP_K,
	YarnConfiguration.DIST_SCHEDULING_TOP_K_DEFAULT);
	long topKComputationInterval = rmContext.getYarnConfiguration().getLong(
	YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPUTE_INT_MS,
	YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPUTE_INT_MS_DEFAULT);
	TopKNodeSelector.TopKComparator comparator =
	TopKNodeSelector.TopKComparator.valueOf(
	rmContext.getYarnConfiguration().get(
	YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPARATOR,
	YarnConfiguration.DIST_SCHEDULING_TOP_K_COMPARATOR_DEFAULT));
	TopKNodeSelector topKSelector =
	new TopKNodeSelector(k, topKComputationInterval, comparator);
	this.clusterMonitor = topKSelector;
	this.nodeSelector = topKSelector;
	}

	@Override
	public Server getServer(YarnRPC rpc, Configuration serverConf,
	InetSocketAddress addr, AMRMTokenSecretManager secretManager) {
	Server server = rpc.getServer(DistributedSchedulerProtocol.class, this,
	addr, serverConf, secretManager,
	serverConf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT,
	YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT));
	// To support application running on NMs that DO NOT support
	// Dist Scheduling... The server multiplexes both the
	// ApplicationMasterProtocol as well as the DistributedSchedulingProtocol
	((RPC.Server) server).addProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER,
	ApplicationMasterProtocolPB.class,
	ApplicationMasterProtocolService.newReflectiveBlockingService(
	new ApplicationMasterProtocolPBServiceImpl(this)));
	return server;
	}

	@Override
	public RegisterApplicationMasterResponse registerApplicationMaster
	(RegisterApplicationMasterRequest request) throws YarnException,
	IOException {
	return super.registerApplicationMaster(request);
	}

	@Override
	public FinishApplicationMasterResponse finishApplicationMaster
	(FinishApplicationMasterRequest request) throws YarnException,
	IOException {
	return super.finishApplicationMaster(request);
	}

	@Override
	public AllocateResponse allocate(AllocateRequest request) throws
	YarnException, IOException {
	return super.allocate(request);
	}

	@Override
	public DistSchedRegisterResponse
	registerApplicationMasterForDistributedScheduling(
	RegisterApplicationMasterRequest request) throws YarnException,
	IOException {
	RegisterApplicationMasterResponse response =
	registerApplicationMaster(request);
	DistSchedRegisterResponse dsResp = recordFactory
	.newRecordInstance(DistSchedRegisterResponse.class);
	dsResp.setRegisterResponse(response);
	dsResp.setMinAllocatableCapabilty(
	Resource.newInstance(
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_MIN_MEMORY,
	YarnConfiguration.DIST_SCHEDULING_MIN_MEMORY_DEFAULT),
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_MIN_VCORES,
	YarnConfiguration.DIST_SCHEDULING_MIN_VCORES_DEFAULT)
	)
	);
	dsResp.setMaxAllocatableCapabilty(
	Resource.newInstance(
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_MAX_MEMORY,
	YarnConfiguration.DIST_SCHEDULING_MAX_MEMORY_DEFAULT),
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_MAX_VCORES,
	YarnConfiguration.DIST_SCHEDULING_MAX_VCORES_DEFAULT)
	)
	);
	dsResp.setIncrAllocatableCapabilty(
	Resource.newInstance(
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_INCR_MEMORY,
	YarnConfiguration.DIST_SCHEDULING_INCR_MEMORY_DEFAULT),
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_INCR_VCORES,
	YarnConfiguration.DIST_SCHEDULING_INCR_VCORES_DEFAULT)
	)
	);
	dsResp.setContainerTokenExpiryInterval(
	getConfig().getInt(
	YarnConfiguration.DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS,
	YarnConfiguration.
	DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS_DEFAULT));
	dsResp.setContainerIdStart(
	this.rmContext.getEpoch() << ResourceManager.EPOCH_BIT_SHIFT);

	// Set nodes to be used for scheduling
	dsResp.setNodesForScheduling(
	new ArrayList<>(this.nodeSelector.selectNodes()));
	return dsResp;
	}

	@Override
	public DistSchedAllocateResponse allocateForDistributedScheduling
	(AllocateRequest request) throws YarnException, IOException {
	AllocateResponse response = allocate(request);
	DistSchedAllocateResponse dsResp = recordFactory.newRecordInstance
	(DistSchedAllocateResponse.class);
	dsResp.setAllocateResponse(response);
	dsResp.setNodesForScheduling(
	new ArrayList<>(
	this.nodeSelector.selectNodes(createSelectionHints(request))));
	return dsResp;
	}

	/**
	* Create a selection criteria. Essentially, it checks for requests that has
	* relax locality set to false and is a Rack local or Node local request.
	* If so, it will add all Nodes in that rack / node (If multiple NMs are
	* running on the node)to the selection criteria. It will also set the
	* min number of nodes required (1 in case of node local reqs) to be equal
	* to the number of containers required.. so that the local RM can spread the
	* containers across the returned nodes.
	* @param request
	* @return Collection of SelectionHint
	*/
	private Collection<NodeSelector.SelectionHint> createSelectionHints(
	AllocateRequest request) {
	List<NodeSelector.SelectionHint> retList = new ArrayList<>();
	// TODO: Add support for node labels (support obtaining a list of nodes
	// given a label expression)
	for (ResourceRequest rr : request.getAskList()) {
	if (!rr.getRelaxLocality()
	&& rackToNode.containsKey(rr.getResourceName())) {
	retList.add(new NodeSelector.SelectionHint(
	rackToNode.get(rr.getResourceName()), rr.getNumContainers()));
	}
	if (!rr.getRelaxLocality()
	&& hostToNode.containsKey(rr.getResourceName())) {
	retList.add(new NodeSelector.SelectionHint(
	hostToNode.get(rr.getResourceName()), 1));
	}
	}
	return retList;
	}

	private void addToMapping(ConcurrentHashMap<String, Set<NodeId>> mapping,
	String rackName, NodeId nodeId) {
	if (rackName != null) {
	mapping.putIfAbsent(rackName, new HashSet<NodeId>());
	Set<NodeId> nodeIds = mapping.get(rackName);
	synchronized (nodeIds) {
	nodeIds.add(nodeId);
	}
	}
	}

	private void removeFromMapping(ConcurrentHashMap<String, Set<NodeId>> mapping,
	String rackName, NodeId nodeId) {
	if (rackName != null) {
	Set<NodeId> nodeIds = mapping.get(rackName);
	synchronized (nodeIds) {
	nodeIds.remove(nodeId);
	}
	}
	}

	@Override
	public void handle(SchedulerEvent event) {
	switch (event.getType()) {
	case NODE_ADDED:
	if (!(event instanceof NodeAddedSchedulerEvent)) {
	throw new RuntimeException("Unexpected event type: " + event);
	}
	NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
	clusterMonitor.addNode(nodeAddedEvent.getContainerReports(),
	nodeAddedEvent.getAddedRMNode());
	addToMapping(rackToNode, nodeAddedEvent.getAddedRMNode().getRackName(),
	nodeAddedEvent.getAddedRMNode().getNodeID());
	addToMapping(hostToNode, nodeAddedEvent.getAddedRMNode().getHostName(),
	nodeAddedEvent.getAddedRMNode().getNodeID());
	break;
	case NODE_REMOVED:
	if (!(event instanceof NodeRemovedSchedulerEvent)) {
	throw new RuntimeException("Unexpected event type: " + event);
	}
	NodeRemovedSchedulerEvent nodeRemovedEvent =
	(NodeRemovedSchedulerEvent)event;
	clusterMonitor.removeNode(nodeRemovedEvent.getRemovedRMNode());
	removeFromMapping(rackToNode,
	nodeRemovedEvent.getRemovedRMNode().getRackName(),
	nodeRemovedEvent.getRemovedRMNode().getNodeID());
	removeFromMapping(hostToNode,
	nodeRemovedEvent.getRemovedRMNode().getHostName(),
	nodeRemovedEvent.getRemovedRMNode().getNodeID());
	break;
	case NODE_UPDATE:
	if (!(event instanceof NodeUpdateSchedulerEvent)) {
	throw new RuntimeException("Unexpected event type: " + event);
	}
	NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
	clusterMonitor.nodeUpdate(nodeUpdatedEvent.getRMNode());
	break;
	case NODE_RESOURCE_UPDATE:
	if (!(event instanceof NodeResourceUpdateSchedulerEvent)) {
	throw new RuntimeException("Unexpected event type: " + event);
	}
	NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent =
	(NodeResourceUpdateSchedulerEvent)event;
	clusterMonitor.updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
	nodeResourceUpdatedEvent.getResourceOption());
	break;

	// <-- IGNORED EVENTS : START -->
	case APP_ADDED:
	break;
	case APP_REMOVED:
	break;
	case APP_ATTEMPT_ADDED:
	break;
	case APP_ATTEMPT_REMOVED:
	break;
	case CONTAINER_EXPIRED:
	break;
	case NODE_LABELS_UPDATE:
	break;
	// <-- IGNORED EVENTS : END -->
	default:
	LOG.error("Unknown event arrived at DistributedSchedulingService: "
	+ event.toString());
	}
	}

	}