hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/AbstractPreemptableResourceCalculator.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;

 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.policy.PriorityUtilizationQueueOrderingPolicy;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.Resources;

 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.PriorityQueue;

 /**
  * Calculate how much resources need to be preempted for each queue,
  * will be used by {@link PreemptionCandidatesSelector}.
  */
 public class AbstractPreemptableResourceCalculator {

   protected final CapacitySchedulerPreemptionContext context;
   protected final ResourceCalculator rc;
   private boolean isReservedPreemptionCandidatesSelector;

   static class TQComparator implements Comparator<TempQueuePerPartition> {
     private ResourceCalculator rc;
     private Resource clusterRes;

     TQComparator(ResourceCalculator rc, Resource clusterRes) {
       this.rc = rc;
       this.clusterRes = clusterRes;
     }

     @Override
     public int compare(TempQueuePerPartition tq1, TempQueuePerPartition tq2) {
       double assigned1 = getIdealPctOfGuaranteed(tq1);
       double assigned2 = getIdealPctOfGuaranteed(tq2);

       return PriorityUtilizationQueueOrderingPolicy.compare(assigned1,
           assigned2, tq1.relativePriority, tq2.relativePriority);
     }

     // Calculates idealAssigned / guaranteed
     // TempQueues with 0 guarantees are always considered the most over
     // capacity and therefore considered last for resources.
     private double getIdealPctOfGuaranteed(TempQueuePerPartition q) {
       double pctOver = Integer.MAX_VALUE;
       if (q != null && Resources.greaterThan(rc, clusterRes, q.getGuaranteed(),
           Resources.none())) {
         pctOver = Resources.divide(rc, clusterRes, q.idealAssigned,
             q.getGuaranteed());
       }
       return (pctOver);
     }
   }

   /**
    * PreemptableResourceCalculator constructor.
    *
    * @param preemptionContext context
    * @param isReservedPreemptionCandidatesSelector
    *          this will be set by different implementation of candidate
    *          selectors, please refer to TempQueuePerPartition#offer for
    *          details.
    */
   public AbstractPreemptableResourceCalculator(
       CapacitySchedulerPreemptionContext preemptionContext,
       boolean isReservedPreemptionCandidatesSelector) {
     context = preemptionContext;
     rc = preemptionContext.getResourceCalculator();
     this.isReservedPreemptionCandidatesSelector =
         isReservedPreemptionCandidatesSelector;
   }

   /**
    * Given a set of queues compute the fix-point distribution of unassigned
    * resources among them. As pending request of a queue are exhausted, the
    * queue is removed from the set and remaining capacity redistributed among
    * remaining queues. The distribution is weighted based on guaranteed
    * capacity, unless asked to ignoreGuarantee, in which case resources are
    * distributed uniformly.
    *
    * @param totGuarant
    *          total guaranteed resource
    * @param qAlloc
    *          List of child queues
    * @param unassigned
    *          Unassigned resource per queue
    * @param ignoreGuarantee
    *          ignore guarantee per queue.
    */
   protected void computeFixpointAllocation(Resource totGuarant,
       Collection<TempQueuePerPartition> qAlloc, Resource unassigned,
       boolean ignoreGuarantee) {
     // Prior to assigning the unused resources, process each queue as follows:
     // If current > guaranteed, idealAssigned = guaranteed + untouchable extra
     // Else idealAssigned = current;
     // Subtract idealAssigned resources from unassigned.
     // If the queue has all of its needs met (that is, if
     // idealAssigned >= current + pending), remove it from consideration.
     // Sort queues from most under-guaranteed to most over-guaranteed.
     TQComparator tqComparator = new TQComparator(rc, totGuarant);
     PriorityQueue<TempQueuePerPartition> orderedByNeed = new PriorityQueue<>(10,
         tqComparator);
     for (Iterator<TempQueuePerPartition> i = qAlloc.iterator(); i.hasNext();) {
       TempQueuePerPartition q = i.next();
       Resource used = q.getUsed();

       if (Resources.greaterThan(rc, totGuarant, used, q.getGuaranteed())) {
         q.idealAssigned = Resources.add(q.getGuaranteed(), q.untouchableExtra);
       } else {
         q.idealAssigned = Resources.clone(used);
       }
       Resources.subtractFrom(unassigned, q.idealAssigned);
       // If idealAssigned < (allocated + used + pending), q needs more
       // resources, so
       // add it to the list of underserved queues, ordered by need.
       Resource curPlusPend = Resources.add(q.getUsed(), q.pending);
       if (Resources.lessThan(rc, totGuarant, q.idealAssigned, curPlusPend)) {
         orderedByNeed.add(q);
       }
     }

     // assign all cluster resources until no more demand, or no resources are
     // left
     while (!orderedByNeed.isEmpty() && Resources.greaterThan(rc, totGuarant,
         unassigned, Resources.none())) {
       Resource wQassigned = Resource.newInstance(0, 0);
       // we compute normalizedGuarantees capacity based on currently active
       // queues
       resetCapacity(unassigned, orderedByNeed, ignoreGuarantee);

       // For each underserved queue (or set of queues if multiple are equally
       // underserved), offer its share of the unassigned resources based on its
       // normalized guarantee. After the offer, if the queue is not satisfied,
       // place it back in the ordered list of queues, recalculating its place
       // in the order of most under-guaranteed to most over-guaranteed. In this
       // way, the most underserved queue(s) are always given resources first.
       Collection<TempQueuePerPartition> underserved = getMostUnderservedQueues(
           orderedByNeed, tqComparator);

       for (Iterator<TempQueuePerPartition> i = underserved.iterator(); i
           .hasNext();) {
         TempQueuePerPartition sub = i.next();
         Resource wQavail = Resources.multiplyAndNormalizeUp(rc, unassigned,
             sub.normalizedGuarantee, Resource.newInstance(1, 1));
         Resource wQidle = sub.offer(wQavail, rc, totGuarant,
             isReservedPreemptionCandidatesSelector);
         Resource wQdone = Resources.subtract(wQavail, wQidle);

         if (Resources.greaterThan(rc, totGuarant, wQdone, Resources.none())) {
           // The queue is still asking for more. Put it back in the priority
           // queue, recalculating its order based on need.
           orderedByNeed.add(sub);
         }
         Resources.addTo(wQassigned, wQdone);
       }
       Resources.subtractFrom(unassigned, wQassigned);
     }

     // Sometimes its possible that, all queues are properly served. So intra
     // queue preemption will not try for any preemption. How ever there are
     // chances that within a queue, there are some imbalances. Hence make sure
     // all queues are added to list.
     while (!orderedByNeed.isEmpty()) {
       TempQueuePerPartition q1 = orderedByNeed.remove();
       context.addPartitionToUnderServedQueues(q1.queueName, q1.partition);
     }
   }

   /**
    * Computes a normalizedGuaranteed capacity based on active queues.
    *
    * @param clusterResource
    *          the total amount of resources in the cluster
    * @param queues
    *          the list of queues to consider
    * @param ignoreGuar
    *          ignore guarantee.
    */
   private void resetCapacity(Resource clusterResource,
       Collection<TempQueuePerPartition> queues, boolean ignoreGuar) {
     Resource activeCap = Resource.newInstance(0, 0);

     if (ignoreGuar) {
       for (TempQueuePerPartition q : queues) {
         q.normalizedGuarantee = 1.0f / queues.size();
       }
     } else {
       for (TempQueuePerPartition q : queues) {
         Resources.addTo(activeCap, q.getGuaranteed());
       }
       for (TempQueuePerPartition q : queues) {
         q.normalizedGuarantee = Resources.divide(rc, clusterResource,
             q.getGuaranteed(), activeCap);
       }
     }
   }

   // Take the most underserved TempQueue (the one on the head). Collect and
   // return the list of all queues that have the same idealAssigned
   // percentage of guaranteed.
   private Collection<TempQueuePerPartition> getMostUnderservedQueues(
       PriorityQueue<TempQueuePerPartition> orderedByNeed,
       TQComparator tqComparator) {
     ArrayList<TempQueuePerPartition> underserved = new ArrayList<>();
     while (!orderedByNeed.isEmpty()) {
       TempQueuePerPartition q1 = orderedByNeed.remove();
       underserved.add(q1);

       // Add underserved queues in order for later uses
       context.addPartitionToUnderServedQueues(q1.queueName, q1.partition);
       TempQueuePerPartition q2 = orderedByNeed.peek();
       // q1's pct of guaranteed won't be larger than q2's. If it's less, then
       // return what has already been collected. Otherwise, q1's pct of
       // guaranteed == that of q2, so add q2 to underserved list during the
       // next pass.
       if (q2 == null || tqComparator.compare(q1, q2) < 0) {
         if (null != q2) {
           context.addPartitionToUnderServedQueues(q2.queueName, q2.partition);
         }
         return underserved;
       }
     }
     return underserved;
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;

	import org.apache.hadoop.yarn.api.records.Resource;
	import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.policy.PriorityUtilizationQueueOrderingPolicy;
	import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
	import org.apache.hadoop.yarn.util.resource.Resources;

	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Comparator;
	import java.util.Iterator;
	import java.util.PriorityQueue;

	/**
	* Calculate how much resources need to be preempted for each queue,
	* will be used by {@link PreemptionCandidatesSelector}.
	*/
	public class AbstractPreemptableResourceCalculator {

	protected final CapacitySchedulerPreemptionContext context;
	protected final ResourceCalculator rc;
	private boolean isReservedPreemptionCandidatesSelector;

	static class TQComparator implements Comparator<TempQueuePerPartition> {
	private ResourceCalculator rc;
	private Resource clusterRes;

	TQComparator(ResourceCalculator rc, Resource clusterRes) {
	this.rc = rc;
	this.clusterRes = clusterRes;
	}

	@Override
	public int compare(TempQueuePerPartition tq1, TempQueuePerPartition tq2) {
	double assigned1 = getIdealPctOfGuaranteed(tq1);
	double assigned2 = getIdealPctOfGuaranteed(tq2);

	return PriorityUtilizationQueueOrderingPolicy.compare(assigned1,
	assigned2, tq1.relativePriority, tq2.relativePriority);
	}

	// Calculates idealAssigned / guaranteed
	// TempQueues with 0 guarantees are always considered the most over
	// capacity and therefore considered last for resources.
	private double getIdealPctOfGuaranteed(TempQueuePerPartition q) {
	double pctOver = Integer.MAX_VALUE;
	if (q != null && Resources.greaterThan(rc, clusterRes, q.getGuaranteed(),
	Resources.none())) {
	pctOver = Resources.divide(rc, clusterRes, q.idealAssigned,
	q.getGuaranteed());
	}
	return (pctOver);
	}
	}

	/**
	* PreemptableResourceCalculator constructor.
	*
	* @param preemptionContext context
	* @param isReservedPreemptionCandidatesSelector
	* this will be set by different implementation of candidate
	* selectors, please refer to TempQueuePerPartition#offer for
	* details.
	*/
	public AbstractPreemptableResourceCalculator(
	CapacitySchedulerPreemptionContext preemptionContext,
	boolean isReservedPreemptionCandidatesSelector) {
	context = preemptionContext;
	rc = preemptionContext.getResourceCalculator();
	this.isReservedPreemptionCandidatesSelector =
	isReservedPreemptionCandidatesSelector;
	}

	/**
	* Given a set of queues compute the fix-point distribution of unassigned
	* resources among them. As pending request of a queue are exhausted, the
	* queue is removed from the set and remaining capacity redistributed among
	* remaining queues. The distribution is weighted based on guaranteed
	* capacity, unless asked to ignoreGuarantee, in which case resources are
	* distributed uniformly.
	*
	* @param totGuarant
	* total guaranteed resource
	* @param qAlloc
	* List of child queues
	* @param unassigned
	* Unassigned resource per queue
	* @param ignoreGuarantee
	* ignore guarantee per queue.
	*/
	protected void computeFixpointAllocation(Resource totGuarant,
	Collection<TempQueuePerPartition> qAlloc, Resource unassigned,
	boolean ignoreGuarantee) {
	// Prior to assigning the unused resources, process each queue as follows:
	// If current > guaranteed, idealAssigned = guaranteed + untouchable extra
	// Else idealAssigned = current;
	// Subtract idealAssigned resources from unassigned.
	// If the queue has all of its needs met (that is, if
	// idealAssigned >= current + pending), remove it from consideration.
	// Sort queues from most under-guaranteed to most over-guaranteed.
	TQComparator tqComparator = new TQComparator(rc, totGuarant);
	PriorityQueue<TempQueuePerPartition> orderedByNeed = new PriorityQueue<>(10,
	tqComparator);
	for (Iterator<TempQueuePerPartition> i = qAlloc.iterator(); i.hasNext();) {
	TempQueuePerPartition q = i.next();
	Resource used = q.getUsed();

	if (Resources.greaterThan(rc, totGuarant, used, q.getGuaranteed())) {
	q.idealAssigned = Resources.add(q.getGuaranteed(), q.untouchableExtra);
	} else {
	q.idealAssigned = Resources.clone(used);
	}
	Resources.subtractFrom(unassigned, q.idealAssigned);
	// If idealAssigned < (allocated + used + pending), q needs more
	// resources, so
	// add it to the list of underserved queues, ordered by need.
	Resource curPlusPend = Resources.add(q.getUsed(), q.pending);
	if (Resources.lessThan(rc, totGuarant, q.idealAssigned, curPlusPend)) {
	orderedByNeed.add(q);
	}
	}

	// assign all cluster resources until no more demand, or no resources are
	// left
	while (!orderedByNeed.isEmpty() && Resources.greaterThan(rc, totGuarant,
	unassigned, Resources.none())) {
	Resource wQassigned = Resource.newInstance(0, 0);
	// we compute normalizedGuarantees capacity based on currently active
	// queues
	resetCapacity(unassigned, orderedByNeed, ignoreGuarantee);

	// For each underserved queue (or set of queues if multiple are equally
	// underserved), offer its share of the unassigned resources based on its
	// normalized guarantee. After the offer, if the queue is not satisfied,
	// place it back in the ordered list of queues, recalculating its place
	// in the order of most under-guaranteed to most over-guaranteed. In this
	// way, the most underserved queue(s) are always given resources first.
	Collection<TempQueuePerPartition> underserved = getMostUnderservedQueues(
	orderedByNeed, tqComparator);

	for (Iterator<TempQueuePerPartition> i = underserved.iterator(); i
	.hasNext();) {
	TempQueuePerPartition sub = i.next();
	Resource wQavail = Resources.multiplyAndNormalizeUp(rc, unassigned,
	sub.normalizedGuarantee, Resource.newInstance(1, 1));
	Resource wQidle = sub.offer(wQavail, rc, totGuarant,
	isReservedPreemptionCandidatesSelector);
	Resource wQdone = Resources.subtract(wQavail, wQidle);

	if (Resources.greaterThan(rc, totGuarant, wQdone, Resources.none())) {
	// The queue is still asking for more. Put it back in the priority
	// queue, recalculating its order based on need.
	orderedByNeed.add(sub);
	}
	Resources.addTo(wQassigned, wQdone);
	}
	Resources.subtractFrom(unassigned, wQassigned);
	}

	// Sometimes its possible that, all queues are properly served. So intra
	// queue preemption will not try for any preemption. How ever there are
	// chances that within a queue, there are some imbalances. Hence make sure
	// all queues are added to list.
	while (!orderedByNeed.isEmpty()) {
	TempQueuePerPartition q1 = orderedByNeed.remove();
	context.addPartitionToUnderServedQueues(q1.queueName, q1.partition);
	}
	}

	/**
	* Computes a normalizedGuaranteed capacity based on active queues.
	*
	* @param clusterResource
	* the total amount of resources in the cluster
	* @param queues
	* the list of queues to consider
	* @param ignoreGuar
	* ignore guarantee.
	*/
	private void resetCapacity(Resource clusterResource,
	Collection<TempQueuePerPartition> queues, boolean ignoreGuar) {
	Resource activeCap = Resource.newInstance(0, 0);

	if (ignoreGuar) {
	for (TempQueuePerPartition q : queues) {
	q.normalizedGuarantee = 1.0f / queues.size();
	}
	} else {
	for (TempQueuePerPartition q : queues) {
	Resources.addTo(activeCap, q.getGuaranteed());
	}
	for (TempQueuePerPartition q : queues) {
	q.normalizedGuarantee = Resources.divide(rc, clusterResource,
	q.getGuaranteed(), activeCap);
	}
	}
	}

	// Take the most underserved TempQueue (the one on the head). Collect and
	// return the list of all queues that have the same idealAssigned
	// percentage of guaranteed.
	private Collection<TempQueuePerPartition> getMostUnderservedQueues(
	PriorityQueue<TempQueuePerPartition> orderedByNeed,
	TQComparator tqComparator) {
	ArrayList<TempQueuePerPartition> underserved = new ArrayList<>();
	while (!orderedByNeed.isEmpty()) {
	TempQueuePerPartition q1 = orderedByNeed.remove();
	underserved.add(q1);

	// Add underserved queues in order for later uses
	context.addPartitionToUnderServedQueues(q1.queueName, q1.partition);
	TempQueuePerPartition q2 = orderedByNeed.peek();
	// q1's pct of guaranteed won't be larger than q2's. If it's less, then
	// return what has already been collected. Otherwise, q1's pct of
	// guaranteed == that of q2, so add q2 to underserved list during the
	// next pass.
	if (q2 == null \|\| tqComparator.compare(q1, q2) < 0) {
	if (null != q2) {
	context.addPartitionToUnderServedQueues(q2.queueName, q2.partition);
	}
	return underserved;
	}
	}
	return underserved;
	}
	}