SLIDER-586: negative node count checks on AM
diff --git a/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java b/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java
index d08b33a..bb9f430 100644
--- a/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java
+++ b/slider-core/src/main/java/org/apache/slider/core/exceptions/TriggerClusterTeardownException.java
@@ -29,8 +29,7 @@
private final FinalApplicationStatus finalApplicationStatus;
public TriggerClusterTeardownException(int code,
- String message,
- FinalApplicationStatus finalApplicationStatus,
+ FinalApplicationStatus finalApplicationStatus, String message,
Object... args) {
super(code, message, args);
this.finalApplicationStatus = finalApplicationStatus;
diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
index 125746d..5ce0a78 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
@@ -150,7 +150,7 @@
int instances = mapOperations.getOptionInt(COMPONENT_INSTANCES, 0);
if (instances < 0) {
throw new BadClusterStateException(
- "Component %s has invalid instance count: %d",
+ "Component %s has negative instance count: %d",
mapOperations.name,
instances);
}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 31658bc..a69a60d 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -678,11 +678,10 @@
String role = roleStatus.getName();
MapOperations comp =
resources.getComponent(role);
- int desiredInstanceCount =
- resources.getComponentOptInt(role, ResourceKeys.COMPONENT_INSTANCES, 0);
+ int desiredInstanceCount = getDesiredInstanceCount(resources, role);
if (desiredInstanceCount == 0) {
- log.warn("Role {} has 0 instances specified", role);
- }
+ log.info("Role {} has 0 instances specified", role);
+ }
if (currentDesired != desiredInstanceCount) {
log.info("Role {} flexed from {} to {}", role, currentDesired,
desiredInstanceCount);
@@ -698,13 +697,36 @@
log.info("Adding new role {}", name);
ProviderRole dynamicRole = createDynamicProviderRole(name,
resources.getComponent(name));
- buildRole(dynamicRole);
+ RoleStatus roleStatus = buildRole(dynamicRole);
+ roleStatus.setDesired(getDesiredInstanceCount(resources, name));
roleHistory.addNewProviderRole(dynamicRole);
}
}
}
/**
+ * Get the desired instance count of a role, rejecting negative values
+ * @param resources resource map
+ * @param role role name
+ * @return the instance count
+ * @throws BadConfigException if the count is negative
+ */
+ private int getDesiredInstanceCount(ConfTreeOperations resources,
+ String role) throws BadConfigException {
+ int desiredInstanceCount =
+ resources.getComponentOptInt(role, ResourceKeys.COMPONENT_INSTANCES, 0);
+
+ if (desiredInstanceCount < 0) {
+ log.error("Role {} has negative desired instances : {}", role,
+ desiredInstanceCount);
+ throw new BadConfigException(
+ "Negative instance count (%) requested for component %s",
+ desiredInstanceCount, role);
+ }
+ return desiredInstanceCount;
+ }
+
+ /**
* Add knowledge of a role.
* This is a build-time operation that is not synchronized, and
* should be used while setting up the system state -before servicing
@@ -1592,10 +1614,9 @@
if (failures > threshold) {
throw new TriggerClusterTeardownException(
SliderExitCodes.EXIT_DEPLOYMENT_FAILED,
- ErrorStrings.E_UNSTABLE_CLUSTER +
+ FinalApplicationStatus.FAILED, ErrorStrings.E_UNSTABLE_CLUSTER +
" - failed with component %s failing %d times (%d in startup);" +
" threshold is %d - last failure: %s",
- FinalApplicationStatus.FAILED,
role.getName(),
role.getFailed(),
role.getStartFailed(),
@@ -1651,9 +1672,18 @@
expected = role.getDesired();
}
- log.info("Reviewing {}", role);
+ log.info("Reviewing {} : expected {}", role, expected);
checkFailureThreshold(role);
+ if (expected < 0 ) {
+ // negative value: fail
+ throw new TriggerClusterTeardownException(
+ SliderExitCodes.EXIT_DEPLOYMENT_FAILED,
+ FinalApplicationStatus.FAILED,
+ "Negative component count of %d desired for component %s",
+ expected, role);
+ }
+
if (delta > 0) {
log.info("{}: Asking for {} more nodes(s) for a total of {} ", name,
delta, expected);
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
index 7786c41..c2ea54a 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
@@ -140,7 +140,7 @@
}
/**
- * Create an AM without a master
+ * Create a standalone AM
* @param clustername AM name
* @param size # of nodes
* @param deleteExistingData should any existing cluster data be deleted
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
index f67ee92..f40d5a7 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
@@ -26,6 +26,7 @@
import org.apache.slider.common.SliderExitCodes
import org.apache.slider.core.exceptions.BadClusterStateException
import org.apache.slider.core.main.ServiceLauncher
+import org.junit.Before
import org.junit.Test
import static org.apache.slider.common.params.Arguments.*
@@ -38,6 +39,28 @@
@Slf4j
class TestAgentEcho extends AgentTestBase {
+ File slider_core
+ String echo_py
+ File echo_py_path
+ File app_def_path
+ String agt_ver
+ File agt_ver_path
+ String agt_conf
+ File agt_conf_path
+
+ @Before
+ public void setupArtifacts() {
+ slider_core = new File(new File(".").absoluteFile, "src/test/python");
+ echo_py = "echo.py"
+ echo_py_path = new File(slider_core, echo_py)
+ app_def_path = new File(app_def_pkg_path)
+ agt_ver = "version"
+ agt_ver_path = new File(slider_core, agt_ver)
+ agt_conf = "agent.ini"
+ agt_conf_path = new File(slider_core, agt_conf)
+
+ }
+
@Override
void checkTestAssumptions(YarnConfiguration conf) {
@@ -53,14 +76,6 @@
true,
false)
- File slider_core = new File(new File(".").absoluteFile, "src/test/python");
- String echo_py = "echo.py"
- File echo_py_path = new File(slider_core, echo_py)
- File app_def_path = new File(app_def_pkg_path)
- String agt_ver = "version"
- File agt_ver_path = new File(slider_core, agt_ver)
- String agt_conf = "agent.ini"
- File agt_conf_path = new File(slider_core, agt_conf)
assert echo_py_path.exists()
assert app_def_path.exists()
assert agt_ver_path.exists()
@@ -102,7 +117,7 @@
sliderClient.flex(clustername, [(role): -1]);
fail("expected an exception")
} catch (BadClusterStateException e) {
- assertExceptionDetails(e, SliderExitCodes.EXIT_BAD_STATE, "")
+ assertExceptionDetails(e, SliderExitCodes.EXIT_BAD_STATE, "negative")
}
}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
index a7bf068..1db500b 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
@@ -20,6 +20,7 @@
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.Container
+import org.apache.slider.core.exceptions.TriggerClusterTeardownException
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.operations.AbstractRMOperation
@@ -112,5 +113,28 @@
}
+ @Test
+ public void testFlexNegative() throws Throwable {
+ int r0 = 6
+ int r1 = 0
+ int r2 = 0
+ role0Status.desired = r0
+ role1Status.desired = r1
+ role2Status.desired = r2
+ List<RoleInstance> instances = createAndStartNodes()
+
+ int clusterSize = r0 + r1 + r2
+ assert instances.size() == clusterSize
+ log.info("shrinking cluster")
+ role0Status.desired = -2
+ List<AppState.NodeCompletionResult> completionResults = []
+ try {
+ createStartAndStopNodes(completionResults)
+ fail("expected an exception")
+ } catch (TriggerClusterTeardownException e) {
+ }
+
+ }
+
}