SLIDER-570 handling of launch failures
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 7b50c60..7928642 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -30,12 +30,15 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.slider.api.StatusKeys
import org.apache.slider.common.tools.ConfigHelper
+import org.apache.slider.core.exceptions.SliderException
+import org.apache.slider.core.launch.SerializedApplicationReport
import org.apache.slider.core.main.ServiceLauncher
import org.apache.slider.common.SliderKeys
import org.apache.slider.common.SliderXmlConfKeys
import org.apache.slider.api.ClusterDescription
import org.apache.slider.common.tools.SliderUtils
import org.apache.slider.client.SliderClient
+import org.apache.slider.core.persist.ApplicationReportSerDeser
import org.apache.slider.test.SliderTestUtils
import org.junit.Before
import org.junit.BeforeClass
@@ -369,6 +372,20 @@
slider(cmd)
}
+ static SliderShell lookup(int result, String id, File out) {
+ assert id
+ def commands = [ACTION_LOOKUP, ARG_ID, id]
+ if (out) commands += [ARG_OUTPUT, out.absolutePath]
+ slider(result, commands)
+ }
+
+ static SliderShell lookup(String id, File out) {
+ assert id
+ def commands = [ACTION_LOOKUP, ARG_ID, id]
+ if (out) commands += [ARG_OUTPUT, out.absolutePath]
+ slider(commands)
+ }
+
static SliderShell list(int result, Collection<String> commands =[]) {
slider(result, [ACTION_LIST] + commands )
}
@@ -608,11 +625,18 @@
String name,
String appTemplate,
String resourceTemplate,
- List<String> extraArgs=[]) {
+ List<String> extraArgs = [],
+ File launchReport = null) {
+
+ if (!launchReport) {
+ launchReport = createAppReportFile()
+ }
+
List<String> commands = [
ACTION_CREATE, name,
ARG_TEMPLATE, appTemplate,
ARG_RESOURCES, resourceTemplate,
+ ARG_OUTPUT, launchReport.absolutePath,
ARG_WAIT, Integer.toString(THAW_WAIT_TIME)
]
@@ -633,20 +657,35 @@
shell.execute()
if (!shell.execute()) {
// app has failed.
-
+
// grab the app report of the last known instance of this app
// which may not be there if it was a config failure; may be out of date
// from a previous run
- log.error("Launch failed with exit code ${shell.ret}.\nLast instance of $name:")
- slider([ACTION_LIST, name, ARG_VERBOSE]).dumpOutput()
-
- // trigger the assertion failure
- shell.assertExitCode(EXIT_SUCCESS)
+ log.error(
+ "Launch failed with exit code ${shell.ret}")
+ shell.dumpOutput()
+
+ // now grab that app report if it is there
+ def appReport = maybeLookupFromLaunchReport(launchReport)
+ String extraText = ""
+ if (appReport) {
+ log.error("Application report:\n$appReport")
+ extraText = appReport.diagnostics
+ }
+
+ fail("Application Launch Failure, exit code ${shell.ret}\n${extraText}")
}
-
return shell
}
+ public File createAppReportFile() {
+ File reportFile = File.createTempFile(
+ "launch",
+ ".json",
+ new File("target"))
+ return reportFile
+ }
+
/**
* If the option is not null/empty, add the command and the option
* @param args arg list being built up
@@ -662,7 +701,47 @@
}
return args
}
+
+ public SerializedApplicationReport maybeLoadAppReport(File reportFile) {
+ if (reportFile.exists() && reportFile.length()> 0) {
+ ApplicationReportSerDeser serDeser = new ApplicationReportSerDeser()
+ def report = serDeser.fromFile(reportFile)
+ return report
+ }
+ return null;
+ }
+
+ public SerializedApplicationReport maybeLookupFromLaunchReport(File launchReport) {
+ def report = maybeLoadAppReport(launchReport)
+ if (report) {
+ return lookupApplication(report.applicationId)
+ } else {
+ return null
+ }
+ }
+ /**
+ * Lookup an application, return null if loading failed
+ * @param id application ID
+ * @return an application report or null
+ */
+ public SerializedApplicationReport lookupApplication(String id) {
+ File reportFile = createAppReportFile();
+ try {
+ def shell = lookup(id, reportFile)
+ if (shell.ret) {
+ return maybeLoadAppReport(reportFile)
+ } else {
+ log.warn("Lookup operation failed:\n" + shell.dumpOutput())
+ return null
+ }
+ } finally {
+ reportFile.delete()
+
+ }
+ }
+
+
public Path buildClusterPath(String clustername) {
return new Path(
clusterFS.homeDirectory,
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy
index 43ac477..31830d9 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy
@@ -223,11 +223,11 @@
* if not the output is printed and an assertion is raised
* @param errorCode expected error code
*/
- public void assertExitCode(int errorCode) {
+ public void assertExitCode(int errorCode, String extra="") {
if (this.ret != errorCode) {
dumpOutput()
throw new SliderException(ret,
- "Expected exit code of command ${command} : ${errorCode} - actual=${ret}")
+ "Expected exit code of command ${command} : ${errorCode} - actual=${ret} $extra")
}
}
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index a4eb1a2..3847e3f 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -50,9 +50,9 @@
}
cleanup(APPLICATION_NAME)
- def shell = createTemplatedSliderApplication( APPLICATION_NAME,
- APP_TEMPLATE2,
- APP_RESOURCE)
+ def shell = createTemplatedSliderApplication(APPLICATION_NAME,
+ APP_TEMPLATE2,
+ APP_RESOURCE)
logShell(shell)
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
new file mode 100644
index 0000000..ce1e0f1
--- /dev/null
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.funtest.lifecycle
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.registry.client.binding.RegistryUtils
+import org.apache.hadoop.registry.client.types.Endpoint
+import org.apache.hadoop.registry.client.types.ServiceRecord
+import org.apache.slider.api.InternalKeys
+import org.apache.slider.common.SliderExitCodes
+import org.apache.slider.common.SliderKeys
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.common.params.SliderActions
+import org.apache.slider.funtest.framework.AgentCommandTestBase
+import org.apache.slider.funtest.framework.FuntestProperties
+import org.apache.slider.funtest.framework.SliderShell
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+
+import static org.apache.slider.core.registry.info.CustomRegistryConstants.*
+
+@CompileStatic
+@Slf4j
+public class AgentLaunchFailureIT extends AgentCommandTestBase
+ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
+
+
+ static String CLUSTER = "test-agent-launchfail"
+
+ static String APP_RESOURCE2 = "../slider-core/src/test/app_packages/test_command_log/resources_no_role.json"
+
+
+ @Before
+ public void prepareCluster() {
+ setupCluster(CLUSTER)
+ }
+
+ @After
+ public void destroyCluster() {
+ cleanup(CLUSTER)
+ }
+
+ @Test
+ public void testAgentLaunchFailure() throws Throwable {
+ describe("Create a failing cluster and validate failure logic")
+
+ // create an AM which fails to launch within a second
+ File launchReportFile = createAppReportFile();
+ SliderShell shell = createTemplatedSliderApplication(CLUSTER,
+ APP_TEMPLATE,
+ APP_RESOURCE2,
+ [
+ ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_ENABLED, "true",
+ ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "1",
+ ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "100",
+ ],
+ launchReportFile)
+
+ maybeLookupFromLaunchReport(launchReportFile)
+ ensureApplicationIsUp(CLUSTER)
+
+
+ //stop
+ freeze(0, CLUSTER,
+ [
+ ARG_FORCE,
+ ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
+ ARG_MESSAGE, "final-shutdown"
+ ])
+
+ destroy(0, CLUSTER)
+
+ //cluster now missing
+ exists(EXIT_UNKNOWN_INSTANCE, CLUSTER)
+
+ }
+}
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
index 50da8ae..16e65fa 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
@@ -49,9 +49,7 @@
@Before
public void prepareCluster() {
setupCluster(CLUSTER)
-
-
- }
+ }
@After
public void destroyCluster() {
@@ -59,7 +57,7 @@
}
@Test
- public void testAgentClusterLifecycle() throws Throwable {
+ public void testAgentRegistry() throws Throwable {
describe("Create a 0-role cluster and make registry queries against it")
// sanity check to verify the config is correct