YARN-8932. ResourceUtilization cpu is misused in oversubscription as a percentage.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPolicy.java
index 1a3ebca..45406d7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPolicy.java
@@ -54,8 +54,8 @@
memoryOverAllocationThresholdBytes - memoryUtilizationBytes;
int vcoreAvailable = Math.round(
- (overAllocationThresholds.getCpuThreshold() - utilization.getCPU()) *
- containersMonitor.getVCoresAllocatedForContainers());
+ containersMonitor.getVCoresAllocatedForContainers() *
+ overAllocationThresholds.getCpuThreshold() - utilization.getCPU());
return (memoryAvailable <= 0 || vcoreAvailable <= 0) ? Resources.none() :
Resource.newInstance(memoryAvailable >> 20, vcoreAvailable);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java
index e4665bb..f8a8a24 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java
@@ -62,7 +62,9 @@
int memoryOverLimit = utilization.getPhysicalMemory() -
absoluteMemoryPreemptionThresholdMb;
- float vcoreOverLimit = utilization.getCPU() - cpuPreemptionThreshold;
+ float vcoreOverLimit = utilization.getCPU() -
+ getContainersMonitor().getVCoresAllocatedForContainers() *
+ cpuPreemptionThreshold;
if (vcoreOverLimit > 0) {
timesCpuOverPreemption++;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerWithOverAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerWithOverAllocation.java
index a2d4aa8..e5d5518b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerWithOverAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerWithOverAllocation.java
@@ -177,9 +177,10 @@
BaseContainerManagerTest.waitForContainerSubState(
containerManager, createContainerId(1), ContainerSubState.RUNNING);
- // the current containers utilization is low
+ // the current containers utilization is low, only 512MBs out of 2GBs and
+ // 0.5 out 4 vcores are being used.
setContainerResourceUtilization(
- ResourceUtilization.newInstance(512, 0, 1.0f/8));
+ ResourceUtilization.newInstance(512, 0, 0.5f));
// start a container that requests more than what's left unallocated
// 512 + 1024 + 824 > 2048
@@ -251,9 +252,10 @@
BaseContainerManagerTest.waitForContainerSubState(
containerManager, createContainerId(1), ContainerSubState.RUNNING);
- // the containers utilization is high
+ // the containers memory utilization is high, 1500 MBs out of 2GBs and
+ // but the cpu utilization, 0.5 out of 4 vcores, is low
setContainerResourceUtilization(
- ResourceUtilization.newInstance(1500, 0, 1.0f/8));
+ ResourceUtilization.newInstance(1500, 0, 0.5f));
// start a container that requests more than what's left unallocated
// 512 + 1024 + 824 > 2048
@@ -313,9 +315,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(1), ContainerSubState.RUNNING);
- // the current containers utilization is low
+ // the current containers utilization is low, 800 MBs out of 2GBs and
+ // 0.5 out of 4 vcores are being used
setContainerResourceUtilization(
- ResourceUtilization.newInstance(800, 0, 1.0f/8));
+ ResourceUtilization.newInstance(800, 0, 0.5f));
// start a container when there is no resources left unallocated.
containerManager.startContainers(StartContainersRequest.newInstance(
@@ -383,9 +386,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(1), ContainerSubState.RUNNING);
- // the containers are fully utilizing their resources
+ // the containers are fully utilizing their memory, 2048 MBs out of
+ // 2 GBs, but cpu utlization, 0.5 out of 4 vcores, is low
setContainerResourceUtilization(
- ResourceUtilization.newInstance(2048, 0, 1.0f/8));
+ ResourceUtilization.newInstance(2048, 0, 0.5f));
// start more OPPORTUNISTIC containers than what the OPPORTUNISTIC container
// queue can hold when there is no unallocated resource left.
@@ -451,9 +455,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(1), ContainerSubState.RUNNING);
- // containers utilization is above the over-allocation threshold
+ // containers utilization is above the over-allocation threshold
+ // 1600 MBs out of 2GBs and 2 out of 4 vcores are being used.
setContainerResourceUtilization(
- ResourceUtilization.newInstance(1600, 0, 1.0f/2));
+ ResourceUtilization.newInstance(1600, 0, 2.0f));
// start a container that can just fit in the remaining unallocated space
containerManager.startContainers(StartContainersRequest.newInstance(
@@ -506,9 +511,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(1), ContainerSubState.RUNNING);
- // containers utilization is low
+ // containers utilization is low, only 512 MBs out of 2GBs and 0.5
+ // out of 4 vcores are being used
setContainerResourceUtilization(
- ResourceUtilization.newInstance(512, 0, 1.0f/8));
+ ResourceUtilization.newInstance(512, 0, 0.5f));
// start a GUARANTEED container that requests more than what's left
// unallocated on the node: (512 + 1024 + 824) > 2048
@@ -562,9 +568,11 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(1), ContainerSubState.RUNNING);
- // the containers utilization is very high
+ // the containers memory utilization is very high, 1800 MBs out of
+ // 2GBs of memory is being used. But the cpu utilization, 0.5 out of
+ // 4 vcores is very low.
setContainerResourceUtilization(
- ResourceUtilization.newInstance(1800, 0, 1.0f/8));
+ ResourceUtilization.newInstance(1800, 0, 0.5f));
// start a GUARANTEED container that requests more than what's left
// unallocated on the node 512 + 1024 + 824 > 2048
@@ -654,9 +662,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(2), ContainerSubState.RUNNING);
- // the containers utilization is low
+ // the containers utilization is low, 1024 MBs out of 2GBs and 0.5
+ // out of 4 vcores are being used.
setContainerResourceUtilization(
- ResourceUtilization.newInstance(1024, 0, 1.0f/8));
+ ResourceUtilization.newInstance(1024, 0, 0.5f));
// start a GUARANTEED container that requests more than what's left
// unallocated on the node: (512 + 1024 + 824) > 2048
@@ -717,9 +726,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(2), ContainerSubState.RUNNING);
- // the container utilization is at the overallocation threshold
+ // the container memory utilization is at the overallocation threshold
+ // 2048 * 0.75 = 1536 MB, the cpu utilization is 2 out of 4 vcores.
setContainerResourceUtilization(
- ResourceUtilization.newInstance(1536, 0, 1.0f/2));
+ ResourceUtilization.newInstance(1536, 0, 2.0f));
containerManager.startContainers(StartContainersRequest.newInstance(
new ArrayList<StartContainerRequest>() {
@@ -739,9 +749,11 @@
}
});
- // the GUARANTEED container is completed releasing resources
+ // the GUARANTEED container is completed releasing resources, the
+ // utilization goes down to 100 MB out of 2GBs of memory and 0.8
+ // out of 4 vcores
setContainerResourceUtilization(
- ResourceUtilization.newInstance(100, 0, 1.0f/5));
+ ResourceUtilization.newInstance(100, 0, 0.8f));
allowContainerToSucceed(2);
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(2), ContainerSubState.DONE);
@@ -800,9 +812,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(0), ContainerSubState.RUNNING);
- // the container is fully utilizing its resources
+ // the container is fully utilizing its resources, 2GBs out of its 2GBs of
+ // memory and 4 out of 4 vcores
setContainerResourceUtilization(
- ResourceUtilization.newInstance(2048, 0, 1.0f));
+ ResourceUtilization.newInstance(2048, 0, 4.0f));
// send two OPPORTUNISTIC container requests that are expected to be queued
containerManager.startContainers(StartContainersRequest.newInstance(
@@ -820,9 +833,11 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(2), ContainerSubState.SCHEDULED);
- // the containers utilization dropped to the overallocation threshold
+ // the containers memory utilization dropped to the overallocation threshold
+ // 0.75 * 2048 = 1536 MBs. The cpu utilization, 2 out of 4 vcores is also
+ // below the cpu overallocation threshold.
setContainerResourceUtilization(
- ResourceUtilization.newInstance(1536, 0, 1.0f/2));
+ ResourceUtilization.newInstance(1536, 0, 2.0f));
// try to start opportunistic containers out of band.
((ContainerManagerForTest) containerManager)
@@ -838,13 +853,15 @@
}
});
- // the GUARANTEED container is completed releasing resources
+ // the GUARANTEED container is completed releasing resources, now the
+ // utilization is 100 MBs out of 2GBs of memory and 0.8 out of 4 vcores
setContainerResourceUtilization(
- ResourceUtilization.newInstance(100, 0, 1.0f/5));
+ ResourceUtilization.newInstance(100, 0, 0.8f));
// the containers utilization dropped way below the overallocation threshold
+ // 512 MBs < 2048 * 0.75 = 1536 MBs, 0.5 vcores < 4 vcores * 0.75 = 3 vcores
setContainerResourceUtilization(
- ResourceUtilization.newInstance(512, 0, 1.0f/8));
+ ResourceUtilization.newInstance(512, 0, 0.5f));
((ContainerManagerForTest) containerManager)
.checkNodeResourceUtilization();
@@ -923,9 +940,10 @@
createContainerId(3), ContainerSubState.RUNNING);
// the containers memory utilization is over the preemption threshold
- // (2048 > 2048 * 0.8 = 1638.4)
+ // (2048 > 2048 * 0.8 = 1638.4). the cpu utilization, 2 out of 4 vcores
+ // is below its preemption threshold
setContainerResourceUtilization(
- ResourceUtilization.newInstance(2048, 0, 0.5f));
+ ResourceUtilization.newInstance(2048, 0, 2.0f));
((ContainerManagerForTest) containerManager)
.checkNodeResourceUtilization();
@@ -988,9 +1006,9 @@
createContainerId(1), ContainerSubState.RUNNING);
// the node is being fully utilized, which is above the preemption
- // threshold (2048 * 0.75 = 1536 MB, 1.0f)
+ // threshold (2048 * 0.75 = 1536 MB, 4 * 0.75 = 3 vcores)
setContainerResourceUtilization(
- ResourceUtilization.newInstance(2048, 0, 1.0f));
+ ResourceUtilization.newInstance(2048, 0, 4.0f));
((ContainerManagerForTest) containerManager)
.checkNodeResourceUtilization();
@@ -1064,10 +1082,10 @@
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(3), ContainerSubState.RUNNING);
- final float fullCpuUtilization = 1.0f;
+ final float fullCpuUtilization = 1.0f * 4;
- // the containers CPU utilization is over its preemption threshold (0.8f)
- // for the first time
+ // the containers CPU utilization is over its preemption threshold
+ // (0.8f * 4 = 3.2 vcores) for the first time
setContainerResourceUtilization(
ResourceUtilization.newInstance(2048, 0, fullCpuUtilization));
((ContainerManagerForTest) containerManager)
@@ -1084,8 +1102,8 @@
}
});
- // the containers CPU utilization is over its preemption threshold (0.8f)
- // for the second time
+ // the containers CPU utilization is over its preemption threshold
+ // (0.8f * 4 = 3.2 vcores) for the second time
setContainerResourceUtilization(
ResourceUtilization.newInstance(2048, 0, fullCpuUtilization));
((ContainerManagerForTest) containerManager)
@@ -1103,8 +1121,8 @@
}
});
- // the containers CPU utilization is over the preemption threshold (0.8f)
- // for the third time
+ // the containers CPU utilization is over the preemption threshold
+ // (0.8f * 4 = 3.2 vcores) for the third time
setContainerResourceUtilization(
ResourceUtilization.newInstance(2048, 0, fullCpuUtilization));
((ContainerManagerForTest) containerManager)
@@ -1127,8 +1145,8 @@
});
// again, the containers CPU utilization is over the preemption threshold
- // (0.8f) for the first time (the cpu over-limit count is reset every time
- // a preemption is triggered)
+ // (0.8f * 4) for the first time (the cpu over-limit count is reset every
+ // time a preemption is triggered)
setContainerResourceUtilization(
ResourceUtilization.newInstance(2048, 0, fullCpuUtilization));
((ContainerManagerForTest) containerManager)
@@ -1145,8 +1163,8 @@
}
});
- // the containers CPU utilization is over the preemption threshold (0.9f)
- // for the second time
+ // the containers CPU utilization is over the preemption threshold
+ // (0.8f * 4 = 3.2 vcores) for the second time
setContainerResourceUtilization(
ResourceUtilization.newInstance(2048, 0, fullCpuUtilization));
((ContainerManagerForTest) containerManager)
@@ -1172,8 +1190,8 @@
// because CPU utilization is over its preemption threshold three times
// consecutively, the amount of cpu utilization over the preemption
- // threshold, that is, 1.0 - 0.8 = 0.2f CPU needs to be reclaimed and
- // as a result, the other OPPORTUNISTIC container should be killed
+ // threshold, that is, (1.0 - 0.8) * 4 = 0.8f CPU needs to be reclaimed
+ // and as a result, the other OPPORTUNISTIC container should be killed
BaseContainerManagerTest.waitForContainerSubState(containerManager,
createContainerId(1), ContainerSubState.DONE);
verifyContainerStatuses(new HashMap<ContainerId, ContainerSubState>() {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPolicy.java
index 7900a61..9965f08 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPolicy.java
@@ -60,6 +60,14 @@
return (int) (Math.round(memoryUtilization * MEMORY_CAPACITY_BYTE) >> 20);
}
+ /**
+ * Get the amount of vcores used given a cpu utilization percentage.
+ * @param cpuUtilizationPercentage cpu utilization percentage in [0, 1.0]
+ */
+ private static float getVcores(float cpuUtilizationPercentage) {
+ return VCORE_CAPACITY * cpuUtilizationPercentage;
+ }
+
@Test
public void testNoVcoresAvailable() {
SnapshotBasedOverAllocationPolicy overAllocationPolicy =
@@ -69,7 +77,7 @@
// the current cpu utilization is over the threshold, 0.8
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(0, 0, 0.9f),
+ ResourceUtilization.newInstance(0, 0, getVcores(0.9f)),
System.currentTimeMillis()));
Resource available = overAllocationPolicy.getAvailableResources();
Assert.assertEquals(
@@ -105,7 +113,7 @@
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
ResourceUtilization.newInstance(
- getMemoryMBs(0.9), 0, 0.9f),
+ getMemoryMBs(0.9), 0, getVcores(0.9f)),
System.currentTimeMillis()));
Resource available = overAllocationPolicy.getAvailableResources();
Assert.assertEquals(
@@ -123,8 +131,8 @@
int memoryUtilizationMBs = getMemoryMBs(0.6);
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(memoryUtilizationMBs, 0, 0.6f),
- System.currentTimeMillis()));
+ ResourceUtilization.newInstance(memoryUtilizationMBs, 0,
+ getVcores(0.6f)), System.currentTimeMillis()));
Resource available = overAllocationPolicy.getAvailableResources();
Assert.assertEquals("Unexpected resources available for overallocation",
Resource.newInstance(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPreemptionPolicy.java
index bbc7c49..fc8a977 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPreemptionPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestSnapshotBasedOverAllocationPreemptionPolicy.java
@@ -47,9 +47,11 @@
@Before
public void setUp() {
- // the node has an allocation of 2048 MB of memory
+ // the node has an allocation of 2048 MB of memory and 4 vcores
when(containersMonitor.getPmemAllocatedForContainers()).
thenReturn(2048 * 1024 * 1024L);
+ when(containersMonitor.getVCoresAllocatedForContainers()).
+ thenReturn(4L);
}
/**
@@ -86,36 +88,36 @@
new SnapshotBasedOverAllocationPreemptionPolicy(PREEMPTION_THRESHOLDS,
MAX_CPU_OVER_PREEMPTION_THRESHOLDS, containersMonitor);
- // the current CPU utilization, 1.0f, is over the preemption threshold,
- // 0.75f, for the first time. The memory utilization, 1000 MB is below
+ // the current CPU utilization, 4.0f, is over the preemption threshold,
+ // 0.75f * 4, for the first time. The memory utilization, 1000 MB is below
// the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// no resources shall be reclaimed
Assert.assertEquals(
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 0.5f, is below the preemption threshold,
- // 0.75f. In the meantime the memory utilization, 1000 MB is also below
+ // the current CPU utilization, 2.0f, is below the preemption threshold,
+ // 0.75f * 4. In the meantime the memory utilization, 1000 MB is also below
// the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 0.5f),
+ ResourceUtilization.newInstance(1000, 0, 2.0f),
Time.now()));
// no resources shall be reclaimed
Assert.assertEquals(
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 1.0f, is over the preemption threshold,
- // 0.75f. In the meantime the memory utilization, 1000 MB is below
+ // the current CPU utilization, 4.0f, is over the preemption threshold,
+ // 0.75f * 4. In the meantime the memory utilization, 1000 MB is below
// the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// no resources shall be reclaimed because the cpu utilization is allowed
// to go over the preemption threshold at most two times in a row. It is
@@ -124,12 +126,12 @@
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 1.0f, is again over the preemption
- // threshold, 0.75f. In the meantime the memory utilization, 1000 MB
+ // the current CPU utilization, 4.0f, is again over the preemption
+ // threshold, 0.75f * 4. In the meantime the memory utilization, 1000 MB
// is below the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// no resources shall be reclaimed because the cpu utilization is allowed
// to go over the preemption threshold at most two times in a row. It is
@@ -138,17 +140,17 @@
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 1.0f, is over the preemption threshold,
+ // the current CPU utilization, 4.0f, is over the preemption threshold,
// the third time in a row. In the meantime the memory utilization, 1000 MB
// is below the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// the amount of cpu utilization over the preemption threshold, that is,
- // 1.0 - 0.75f = 0.25, shall be reclaimed.
+ // (1.0 - 0.75)*4 = 1.0f, shall be reclaimed.
Assert.assertEquals(
- ResourceUtilization.newInstance(0, 0, 0.25f),
+ ResourceUtilization.newInstance(0, 0, 1.0f),
preemptionPolicy.getResourcesToReclaim());
}
@@ -162,12 +164,12 @@
new SnapshotBasedOverAllocationPreemptionPolicy(PREEMPTION_THRESHOLDS,
MAX_CPU_OVER_PREEMPTION_THRESHOLDS, containersMonitor);
- // the current CPU utilization, 1.0f, is over the preemption threshold,
- // 0.75f, for the first time. The memory utilization, 1000 MB is below
+ // the current CPU utilization, 4.0f, is over the preemption threshold,
+ // 0.75f * 4, for the first time. The memory utilization, 1000 MB is below
// the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// no resources shall be reclaimed because the cpu utilization is allowed
// to go over the preemption threshold at most two times in a row. It is
@@ -176,12 +178,12 @@
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 0.5f, is below the preemption threshold,
- // 0.75f. The memory utilization, 2000 MB, however, is above the memory
+ // the current CPU utilization, 2.0f, is below the preemption threshold,
+ // 0.75f * 4. The memory utilization, 2000 MB, however, is above the memory
// preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(2000, 0, 0.5f),
+ ResourceUtilization.newInstance(2000, 0, 2.0f),
Time.now()));
// the amount of memory utilization over the preemption threshold, that is,
// 2000 - (2048 * 0.75) = 464 MB of memory, shall be reclaimed.
@@ -189,12 +191,12 @@
ResourceUtilization.newInstance(464, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 1.0f, is over the preemption threshold,
- // 0.75f, for the first time. The memory utilization, 1000 MB is below
+ // the current CPU utilization, 4.0f, is over the preemption threshold,
+ // 0.75f * 4, for the first time. The memory utilization, 1000 MB is below
// the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// no resources shall be reclaimed because the cpu utilization is allowed
// to go over the preemption threshold at most two times in a row. It is
@@ -203,12 +205,12 @@
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 1.0f, is again over the preemption
- // threshold, 0.75f. In the meantime the memory utilization, 1000 MB
+ // the current CPU utilization, 4.0f, is again over the preemption
+ // threshold, 0.75f * 4. In the meantime the memory utilization, 1000 MB
// is still below the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 1.0f),
+ ResourceUtilization.newInstance(1000, 0, 4.0f),
Time.now()));
// no resources shall be reclaimed because the cpu utilization is allowed
// to go over the preemption threshold at most two times in a row. It is
@@ -217,20 +219,20 @@
ResourceUtilization.newInstance(0, 0, 0.0f),
preemptionPolicy.getResourcesToReclaim());
- // the current CPU utilization, 1.0f, is over the CPU preemption threshold,
- // 0.75f, the third time in a row. In the meantime, the memory utilization,
- // 2000 MB, is also over the memory preemption threshold,
+ // the current CPU utilization, 4.0f, is over the CPU preemption threshold,
+ // 0.75f * 4, the third time in a row. In the meantime, the memory
+ // utilization, 2000 MB, is also over the memory preemption threshold,
// 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(2000, 0, 1.0f),
+ ResourceUtilization.newInstance(2000, 0, 4.0f),
Time.now()));
// the amount of memory utilization over the preemption threshold, that is,
// 2000 - (2048 * 0.75) = 464 MB of memory, and the amount of cpu
- // utilization over the preemption threshold, that is, 1.0f - 0.75f = 0.25f,
+ // utilization over the preemption threshold, that is, 4.0 - 0.75f*4 = 1.0f,
// shall be reclaimed.
Assert.assertEquals(
- ResourceUtilization.newInstance(464, 0, 0.25f),
+ ResourceUtilization.newInstance(464, 0, 1.0f),
preemptionPolicy.getResourcesToReclaim());
}
@@ -243,12 +245,12 @@
new SnapshotBasedOverAllocationPreemptionPolicy(PREEMPTION_THRESHOLDS,
MAX_CPU_OVER_PREEMPTION_THRESHOLDS, containersMonitor);
- // the current CPU utilization, 0.5f, is below the preemption threshold,
+ // the current CPU utilization, 2.0f, is below the preemption threshold,
// 0.75f. In the meantime the memory utilization, 1000 MB is also below
// the memory preemption threshold, 2048 * 0.75 = 1536 MB.
when(containersMonitor.getContainersUtilization(anyBoolean())).thenReturn(
new ContainersMonitor.ContainersResourceUtilization(
- ResourceUtilization.newInstance(1000, 0, 0.5f),
+ ResourceUtilization.newInstance(1000, 0, 2.0f),
Time.now()));
// no resources shall be reclaimed because both CPU and memory utilization
// are under the preemption threshold
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
index 8d47f34..8074114 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
@@ -744,8 +744,7 @@
// account for resources allocated in this heartbeat
projectedNodeUtilization.addTo(
(int) (resourceAllocatedPendingLaunch.getMemorySize()), 0,
- (float) resourceAllocatedPendingLaunch.getVirtualCores() /
- capacity.getVirtualCores());
+ (float) resourceAllocatedPendingLaunch.getVirtualCores());
ResourceThresholds thresholds =
overAllocationInfo.getOverAllocationThresholds();
@@ -756,7 +755,7 @@
- projectedNodeUtilization.getPhysicalMemory());
int allowedCpu = Math.max(0, (int)
(overAllocationThreshold.getVirtualCores() -
- projectedNodeUtilization.getCPU() * capacity.getVirtualCores()));
+ projectedNodeUtilization.getCPU()));
Resource resourceAllowedForOpportunisticContainers =
Resources.createResource(allowedMemory, allowedCpu);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
index a0522d3..5b2e00c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
@@ -2894,11 +2894,11 @@
}
/**
- * Test that NO OPPORTUNISTIC containers can be allocated on a node that
- * is fully allocated and with a very high utilization.
+ * Test that NO OPPORTUNISTIC containers can be allocated on a node where
+ * the memory is fully allocated and with a very high utilization.
*/
@Test
- public void testAllocateNoOpportunisticContainersOnBusyNode()
+ public void testAllocateNoOpportunisticContainersOnMemoryBusyNode()
throws IOException {
conf.setBoolean(
YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED, true);
@@ -2918,7 +2918,7 @@
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
- // Add a node with 4G of memory and 4 vcores and an overallocation
+ // Add a node with 2G of memory and 2 vcores and an overallocation
// threshold of 0.75f and 0.75f for memory and cpu respectively
OverAllocationInfo overAllocationInfo = OverAllocationInfo.newInstance(
ResourceThresholds.newInstance(0.75f, 0.75f));
@@ -2928,7 +2928,7 @@
// create a scheduling request that takes up the node's full memory
ApplicationAttemptId appAttempt1 =
- createSchedulingRequest(2048, "queue1", "user1", 1);
+ createSchedulingRequest(2048, 1, "queue1", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
assertEquals(2048, scheduler.getQueueManager().getQueue("queue1").
getGuaranteedResourceUsage().getMemorySize());
@@ -2939,18 +2939,18 @@
ExecutionType.GUARANTEED,
allocatedContainers1.get(0).getExecutionType());
- // node utilization shoots up after the container runs on the node
+ // memory utilization shoots up after the container runs on the node
ContainerStatus containerStatus = ContainerStatus.newInstance(
allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
ContainerExitStatus.SUCCESS);
node.updateContainersInfoAndUtilization(
new UpdatedContainerInfo(Collections.singletonList(containerStatus),
Collections.emptyList()),
- ResourceUtilization.newInstance(2000, 0, 0.8f));
+ ResourceUtilization.newInstance(2000, 0, 0.0f));
// create another scheduling request
ApplicationAttemptId appAttempt2
- = createSchedulingRequest(100, "queue2", "user1", 1);
+ = createSchedulingRequest(100, 1, "queue2", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
List<Container> allocatedContainers2 =
scheduler.getSchedulerApp(appAttempt2).pullNewlyAllocatedContainers();
@@ -2976,12 +2976,98 @@
}
/**
- * Test that OPPORTUNISTIC containers can be allocated on a node with low
- * utilization even though there is not enough unallocated resource on the
- * node to accommodate the request.
+ * Test that NO OPPORTUNISTIC containers can be allocated on a node where
+ * the memory is fully allocated and with a very high utilization.
*/
@Test
- public void testAllocateOpportunisticContainersOnPartiallyOverAllocatedNode()
+ public void testAllocateNoOpportunisticContainersOnCPUBusyNode()
+ throws Exception {
+ conf.setBoolean(
+ YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED, true);
+ // disable resource request normalization in fair scheduler
+ int memoryAllocationIncrement = conf.getInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ FairSchedulerConfiguration.
+ DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 1);
+ int memoryAllocationMinimum = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 1);
+ try {
+ scheduler.init(conf);
+ scheduler.start();
+ scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+ // Add a node with 4G of memory and 4 vcores and an overallocation
+ // threshold of 0.75f and 0.75f for memory and cpu respectively
+ OverAllocationInfo overAllocationInfo = OverAllocationInfo.newInstance(
+ ResourceThresholds.newInstance(0.75f, 0.75f));
+ MockNodes.MockRMNodeImpl node = MockNodes.newNodeInfo(1,
+ Resources.createResource(4096, 4), overAllocationInfo);
+ scheduler.handle(new NodeAddedSchedulerEvent(node));
+
+ // create a scheduling request that takes up the node's full CPU
+ ApplicationAttemptId appAttempt1 =
+ createSchedulingRequest(1024, 4, "queue1", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+
+ List<Container> allocatedContainers1 =
+ scheduler.getSchedulerApp(appAttempt1).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers1.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.GUARANTEED,
+ allocatedContainers1.get(0).getExecutionType());
+ assertEquals(4, scheduler.getQueueManager().getQueue("queue1").
+ getGuaranteedResourceUsage().getVirtualCores());
+
+ // node utilization shoots up after the container runs on the node
+ ContainerStatus containerStatus = ContainerStatus.newInstance(
+ allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
+ ContainerExitStatus.SUCCESS);
+ node.updateContainersInfoAndUtilization(
+ new UpdatedContainerInfo(Collections.singletonList(containerStatus),
+ Collections.emptyList()),
+ ResourceUtilization.newInstance(0, 0, 3.5f));
+
+ // create another scheduling request that should NOT be satisfied
+ // immediately because the node cpu utilization is over its
+ // overallocation threshold
+ ApplicationAttemptId appAttempt3
+ = createSchedulingRequest(1024, 1, "queue2", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ List<Container> allocatedContainers3 =
+ scheduler.getSchedulerApp(appAttempt3).pullNewlyAllocatedContainers();
+ assertTrue("Expecting no containers allocated",
+ allocatedContainers3.size() == 0);
+ assertEquals(0, scheduler.getQueueManager().getQueue("queue2").
+ getOpportunisticResourceUsage().getVirtualCores());
+
+ // verify that a reservation is made for the second resource request
+ Resource reserved = scheduler.getNode(node.getNodeID()).
+ getReservedContainer().getReservedResource();
+ assertTrue("Expect a reservation made for the second resource request",
+ reserved.equals(Resource.newInstance(1024, 1)));
+ } finally {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ false);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ memoryAllocationMinimum);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ memoryAllocationIncrement);
+ }
+ }
+
+ /**
+ * Test that OPPORTUNISTIC containers can be allocated on a node with low
+ * memory utilization even though there is not enough unallocated resource
+ * on the node to accommodate the request.
+ */
+ @Test
+ public void
+ testAllocateOpportunisticContainersOnMemoryPartiallyOverAllocatedNode()
throws IOException {
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
true);
@@ -3010,9 +3096,9 @@
Resources.createResource(4096, 4), overAllocationInfo);
scheduler.handle(new NodeAddedSchedulerEvent(node));
- // create a scheduling request that leaves some unallocated resources
+ // create a scheduling request that leaves some unallocated memory
ApplicationAttemptId appAttempt1 =
- createSchedulingRequest(3600, "queue1", "user1", 1);
+ createSchedulingRequest(3600, 1, "queue1", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
assertEquals(3600, scheduler.getQueueManager().getQueue("queue1").
getGuaranteedResourceUsage().getMemorySize());
@@ -3023,19 +3109,19 @@
ExecutionType.GUARANTEED,
allocatedContainers1.get(0).getExecutionType());
- // node utilization is low after the container is launched on the node
+ // memory utilization is low after the container is launched on the node
ContainerStatus containerStatus = ContainerStatus.newInstance(
allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
ContainerExitStatus.SUCCESS);
node.updateContainersInfoAndUtilization(
new UpdatedContainerInfo(Collections.singletonList(containerStatus),
Collections.emptyList()),
- ResourceUtilization.newInstance(1800, 0, 0.5f));
+ ResourceUtilization.newInstance(1800, 0, 0.0f));
- // create another scheduling request that asks for more than what's left
+ // create another scheduling request that asks for more than the memory
// unallocated on the node but can be served with overallocation.
ApplicationAttemptId appAttempt2 =
- createSchedulingRequest(1024, "queue2", "user1", 1);
+ createSchedulingRequest(1024, 1, "queue2", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
assertEquals(1024, scheduler.getQueueManager().getQueue("queue2").
getOpportunisticResourceUsage().getMemorySize());
@@ -3063,11 +3149,13 @@
}
/**
- * Test opportunistic containers can be allocated on a node that is fully
- * allocated but whose utilization is very low.
+ * Test that OPPORTUNISTIC containers can be allocated on a node with low
+ * cpu utilization even though there is not enough unallocated resource
+ * on the node to accommodate the request.
*/
@Test
- public void testAllocateOpportunisticContainersOnFullyAllocatedNode()
+ public void
+ testAllocateOpportunisticContainersOnCPUPartiallyOverAllocatedNode()
throws IOException {
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
true);
@@ -3096,9 +3184,94 @@
Resources.createResource(4096, 4), overAllocationInfo);
scheduler.handle(new NodeAddedSchedulerEvent(node));
- // create a scheduling request that takes up the whole node
+ // create a scheduling request that leaves some unallocated CPU resources
+ ApplicationAttemptId appAttempt1 =
+ createSchedulingRequest(1024, 3, "queue1", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ assertEquals(3, scheduler.getQueueManager().getQueue("queue1").
+ getGuaranteedResourceUsage().getVirtualCores());
+ List<Container> allocatedContainers1 =
+ scheduler.getSchedulerApp(appAttempt1).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers1.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.GUARANTEED,
+ allocatedContainers1.get(0).getExecutionType());
+
+ // cpu utilization is low after the container is launched on the node
+ ContainerStatus containerStatus = ContainerStatus.newInstance(
+ allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
+ ContainerExitStatus.SUCCESS);
+ node.updateContainersInfoAndUtilization(
+ new UpdatedContainerInfo(Collections.singletonList(containerStatus),
+ Collections.emptyList()),
+ ResourceUtilization.newInstance(0, 0, 1.0f));
+
+ // create another scheduling request that asks for more than what's left
+ // unallocated on the node but can be served with overallocation.
+ ApplicationAttemptId appAttempt2 =
+ createSchedulingRequest(1024, 2, "queue2", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ assertEquals(2, scheduler.getQueueManager().getQueue("queue2").
+ getOpportunisticResourceUsage().getVirtualCores());
+ List<Container> allocatedContainers2 =
+ scheduler.getSchedulerApp(appAttempt2).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers2.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.OPPORTUNISTIC,
+ allocatedContainers2.get(0).getExecutionType());
+
+ // verify that no reservation is made for the second request given
+ // that it's satisfied by an OPPORTUNISTIC container allocation.
+ assertTrue("No reservation should be made because we have satisfied" +
+ " the second request with an OPPORTUNISTIC container allocation",
+ scheduler.getNode(node.getNodeID()).getReservedContainer() == null);
+ } finally {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ false);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ memoryAllocationMinimum);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ memoryAllocationIncrement);
+ }
+ }
+ /**
+ * Test opportunistic containers can be allocated on a node where the memory
+ * is fully allocated but whose utilization is very low.
+ */
+ @Test
+ public void testAllocateOpportunisticContainersOnMemoryFullyAllocatedNode()
+ throws IOException {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ true);
+ // disable resource request normalization in fair scheduler
+ int memoryAllocationIncrement = conf.getInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ FairSchedulerConfiguration.
+ DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 1);
+ int memoryAllocationMinimum = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 1);
+
+ try {
+ scheduler.init(conf);
+ scheduler.start();
+ scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+ // Add a node with 4G of memory and 4 vcores and an overallocation
+ // threshold of 0.75f and 0.75f for memory and cpu respectively
+ OverAllocationInfo overAllocationInfo = OverAllocationInfo.newInstance(
+ ResourceThresholds.newInstance(0.75f, 0.75f));
+ MockNodes.MockRMNodeImpl node = MockNodes.newNodeInfo(1,
+ Resources.createResource(4096, 4), overAllocationInfo);
+ scheduler.handle(new NodeAddedSchedulerEvent(node));
+
+ // create a scheduling request that takes up all memory
ApplicationAttemptId appAttempt1 = createSchedulingRequest(
- 4096, "queue1", "user1", 4);
+ 4096, 1, "queue1", "user1", 4);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
assertEquals(4096, scheduler.getQueueManager().getQueue("queue1").
getGuaranteedResourceUsage().getMemorySize());
@@ -3109,20 +3282,20 @@
ExecutionType.GUARANTEED,
allocatedContainers1.get(0).getExecutionType());
- // node utilization is low after the container is launched on the node
+ // memory utilization is low after the container is launched on the node
ContainerStatus containerStatus = ContainerStatus.newInstance(
allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
ContainerExitStatus.SUCCESS);
node.updateContainersInfoAndUtilization(
new UpdatedContainerInfo(Collections.singletonList(containerStatus),
Collections.emptyList()),
- ResourceUtilization.newInstance(1800, 0, 0.5f));
+ ResourceUtilization.newInstance(1800, 0, 0.0f));
// create another scheduling request now that there is no unallocated
- // resources left on the node, the request should be served with an
+ // memory resources left on the node, the request should be served with an
// allocation of an opportunistic container
ApplicationAttemptId appAttempt2 = createSchedulingRequest(
- 1024, "queue2", "user1", 1);
+ 1024, 1, "queue2", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
assertEquals(1024, scheduler.getQueueManager().getQueue("queue2").
getOpportunisticResourceUsage().getMemorySize());
@@ -3150,6 +3323,93 @@
}
/**
+ * Test opportunistic containers can be allocated on a node where the CPU
+ * is fully allocated but whose utilization is very low.
+ */
+ @Test
+ public void testAllocateOpportunisticContainersOnCPUFullyAllocatedNode()
+ throws IOException {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ true);
+ // disable resource request normalization in fair scheduler
+ int memoryAllocationIncrement = conf.getInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ FairSchedulerConfiguration.
+ DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 1);
+ int memoryAllocationMinimum = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 1);
+
+ try {
+ scheduler.init(conf);
+ scheduler.start();
+ scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+ // Add a node with 4G of memory and 4 vcores and an overallocation
+ // threshold of 0.75f and 0.75f for memory and cpu respectively
+ OverAllocationInfo overAllocationInfo = OverAllocationInfo.newInstance(
+ ResourceThresholds.newInstance(0.75f, 0.75f));
+ MockNodes.MockRMNodeImpl node = MockNodes.newNodeInfo(1,
+ Resources.createResource(4096, 4), overAllocationInfo);
+ scheduler.handle(new NodeAddedSchedulerEvent(node));
+
+ // create a scheduling request that takes up all vcores
+ ApplicationAttemptId appAttempt1 = createSchedulingRequest(
+ 1024, 4, "queue1", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ assertEquals(4, scheduler.getQueueManager().getQueue("queue1").
+ getGuaranteedResourceUsage().getVirtualCores());
+ List<Container> allocatedContainers1 =
+ scheduler.getSchedulerApp(appAttempt1).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers1.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.GUARANTEED,
+ allocatedContainers1.get(0).getExecutionType());
+
+ // cpu utilization is low after the container is launched on the node
+ ContainerStatus containerStatus = ContainerStatus.newInstance(
+ allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
+ ContainerExitStatus.SUCCESS);
+ node.updateContainersInfoAndUtilization(
+ new UpdatedContainerInfo(Collections.singletonList(containerStatus),
+ Collections.emptyList()),
+ ResourceUtilization.newInstance(0, 0, 1.0f));
+
+ // create another scheduling request now that there is no unallocated
+ // cpu resources left on the node, the request should be served with an
+ // allocation of an opportunistic container
+ ApplicationAttemptId appAttempt2 = createSchedulingRequest(
+ 1024, 2, "queue2", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ assertEquals(2, scheduler.getQueueManager().getQueue("queue2").
+ getOpportunisticResourceUsage().getVirtualCores());
+ List<Container> allocatedContainers2 =
+ scheduler.getSchedulerApp(appAttempt2).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers2.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.OPPORTUNISTIC,
+ allocatedContainers2.get(0).getExecutionType());
+
+ // verify that no reservation is made for the second request given
+ // that it's satisfied by an OPPORTUNISTIC container allocation.
+ assertTrue("No reservation should be made because we have satisfied" +
+ " the second request with an OPPORTUNISTIC container allocation",
+ scheduler.getNode(node.getNodeID()).getReservedContainer() == null);
+ } finally {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ false);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ memoryAllocationMinimum);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ memoryAllocationIncrement);
+ }
+ }
+
+ /**
* Test opportunistic containers can be allocated on a node with a low
* utilization even though there are GUARANTEED containers allocated.
*/
@@ -3277,7 +3537,7 @@
* @throws Exception
*/
@Test
- public void testMaxOverallocationPerNode() throws Exception {
+ public void testMaxMemoryOverallocationPerNode() throws Exception {
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
true);
// disable resource request normalization in fair scheduler
@@ -3309,9 +3569,9 @@
Resources.createResource(1024, 1), overAllocationInfo);
scheduler.handle(new NodeAddedSchedulerEvent(node));
- // create a scheduling request that takes up the whole node
+ // create a scheduling request that takes up all memory on the node
ApplicationAttemptId appAttempt1 =
- createSchedulingRequest(1024, "queue1", "user1", 1);
+ createSchedulingRequest(1024, 1, "queue1", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").
getGuaranteedResourceUsage().getMemorySize());
@@ -3332,7 +3592,7 @@
ResourceUtilization.newInstance(0, 0, 0.0f));
// create a scheduling request that should get allocated an OPPORTUNISTIC
- // container because the node utilization is zero
+ // container because the memory utilization is zero
ApplicationAttemptId appAttempt2 =
createSchedulingRequest(1024, "queue2", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
@@ -3355,7 +3615,7 @@
ResourceUtilization.newInstance(0, 0, 0.0f));
// create another scheduling request that should not get any allocation
- // because of the max overallocation on the node will be exceeded.
+ // because of the max memory overallocation on the node will be exceeded.
ApplicationAttemptId appAttempt3 =
createSchedulingRequest(1024, "queue3", "user1", 1);
scheduler.handle(new NodeUpdateSchedulerEvent(node));
@@ -3380,6 +3640,112 @@
}
/**
+ * Test that max CPU overallocation per node is enforced by Fair Scheduler.
+ */
+ @Test
+ public void testMaxCPUOverallocationPerNode() throws Exception {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ true);
+ // disable resource request normalization in fair scheduler
+ int memoryAllocationIncrement = conf.getInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ FairSchedulerConfiguration.
+ DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 1);
+ int memoryAllocationMinimum = conf.getInt(
+ YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 1);
+ float maxOverallocationRatio = conf.getFloat(
+ YarnConfiguration.PER_NODE_MAX_OVERALLOCATION_RATIO,
+ YarnConfiguration.DEFAULT_PER_NODE_MAX_OVERALLOCATION_RATIO);
+ conf.setFloat(YarnConfiguration.PER_NODE_MAX_OVERALLOCATION_RATIO, 1.0f);
+
+ try {
+ scheduler.init(conf);
+ scheduler.start();
+ scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+ // Add a node with 4G of memory and 2 vcores and an overallocation
+ // threshold of 1.0f and 1.0f for memory and cpu respectively
+ OverAllocationInfo overAllocationInfo = OverAllocationInfo.newInstance(
+ ResourceThresholds.newInstance(1f, 1f));
+ MockNodes.MockRMNodeImpl node = MockNodes.newNodeInfo(1,
+ Resources.createResource(4096, 2), overAllocationInfo);
+ scheduler.handle(new NodeAddedSchedulerEvent(node));
+
+ // create a scheduling request that takes up all CPU on the node
+ ApplicationAttemptId appAttempt1 =
+ createSchedulingRequest(1024, 2, "queue1", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ assertEquals(2, scheduler.getQueueManager().getQueue("queue1").
+ getGuaranteedResourceUsage().getVirtualCores());
+ List<Container> allocatedContainers1 =
+ scheduler.getSchedulerApp(appAttempt1).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers1.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.GUARANTEED,
+ allocatedContainers1.get(0).getExecutionType());
+
+ // cpu utilization is zero after the container runs
+ ContainerStatus containerStatus1 = ContainerStatus.newInstance(
+ allocatedContainers1.get(0).getId(), ContainerState.RUNNING, "",
+ ContainerExitStatus.SUCCESS);
+ node.updateContainersInfoAndUtilization(
+ new UpdatedContainerInfo(Collections.singletonList(containerStatus1),
+ Collections.emptyList()),
+ ResourceUtilization.newInstance(0, 0, 0.0f));
+
+ // create a scheduling request that should get allocated an OPPORTUNISTIC
+ // container because the cpu utilization is zero
+ ApplicationAttemptId appAttempt2 =
+ createSchedulingRequest(1024, 2, "queue2", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ List<Container> allocatedContainers2 =
+ scheduler.getSchedulerApp(appAttempt2).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers2.size() == 1);
+ assertEquals("unexpected container execution type",
+ ExecutionType.OPPORTUNISTIC,
+ allocatedContainers2.get(0).getExecutionType());
+ assertEquals(2, scheduler.getQueueManager().getQueue("queue2").
+ getOpportunisticResourceUsage().getVirtualCores());
+
+ // node utilization is still zero after the container runs
+ ContainerStatus containerStatus2 = ContainerStatus.newInstance(
+ allocatedContainers2.get(0).getId(), ContainerState.RUNNING, "",
+ ContainerExitStatus.SUCCESS);
+ node.updateContainersInfoAndUtilization(
+ new UpdatedContainerInfo(Collections.singletonList(containerStatus2),
+ Collections.emptyList()),
+ ResourceUtilization.newInstance(0, 0, 0.0f));
+
+ // create another scheduling request that should not get any allocation
+ // because of the max CPU overallocation on the node will be exceeded.
+ ApplicationAttemptId appAttempt3 =
+ createSchedulingRequest(1024, 1, "queue3", "user1", 1);
+ scheduler.handle(new NodeUpdateSchedulerEvent(node));
+ assertEquals(0, scheduler.getQueueManager().getQueue("queue3").
+ getOpportunisticResourceUsage().getVirtualCores());
+ List<Container> allocatedContainers3 =
+ scheduler.getSchedulerApp(appAttempt3).pullNewlyAllocatedContainers();
+ assertTrue(allocatedContainers3.size() == 0);
+ assertEquals(0, scheduler.getQueueManager().getQueue("queue3").
+ getOpportunisticResourceUsage().getVirtualCores());
+ } finally {
+ conf.setBoolean(YarnConfiguration.RM_SCHEDULER_OVERSUBSCRIPTION_ENABLED,
+ false);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+ memoryAllocationMinimum);
+ conf.setInt(
+ FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB,
+ memoryAllocationIncrement);
+ conf.setFloat(YarnConfiguration.PER_NODE_MAX_OVERALLOCATION_RATIO,
+ maxOverallocationRatio);
+ }
+ }
+
+ /**
* Test promotion of a single OPPORTUNISTIC container when no resources are
* reserved on the node where the container is allocated.
*/