SLIDER-570 handling of launch failures
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 7b50c60..7928642 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -30,12 +30,15 @@
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.slider.api.StatusKeys
 import org.apache.slider.common.tools.ConfigHelper
+import org.apache.slider.core.exceptions.SliderException
+import org.apache.slider.core.launch.SerializedApplicationReport
 import org.apache.slider.core.main.ServiceLauncher
 import org.apache.slider.common.SliderKeys
 import org.apache.slider.common.SliderXmlConfKeys
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.common.tools.SliderUtils
 import org.apache.slider.client.SliderClient
+import org.apache.slider.core.persist.ApplicationReportSerDeser
 import org.apache.slider.test.SliderTestUtils
 import org.junit.Before
 import org.junit.BeforeClass
@@ -369,6 +372,20 @@
     slider(cmd)
   }
 
+  static SliderShell lookup(int result, String id, File out) {
+    assert id
+    def commands = [ACTION_LOOKUP, ARG_ID, id]
+    if (out) commands += [ARG_OUTPUT, out.absolutePath]
+    slider(result, commands)
+  }
+  
+  static SliderShell lookup(String id, File out) {
+    assert id
+    def commands = [ACTION_LOOKUP, ARG_ID, id]
+    if (out) commands += [ARG_OUTPUT, out.absolutePath]
+    slider(commands)
+  }
+
   static SliderShell list(int result, Collection<String> commands =[]) {
     slider(result, [ACTION_LIST] + commands )
   }
@@ -608,11 +625,18 @@
       String name,
       String appTemplate,
       String resourceTemplate,
-      List<String> extraArgs=[]) {
+      List<String> extraArgs = [],
+      File launchReport = null) {
+
+    if (!launchReport) {
+      launchReport = createAppReportFile()
+    }
+    
     List<String> commands = [
         ACTION_CREATE, name,
         ARG_TEMPLATE, appTemplate,
         ARG_RESOURCES, resourceTemplate,
+        ARG_OUTPUT, launchReport.absolutePath,
         ARG_WAIT, Integer.toString(THAW_WAIT_TIME)
     ]
 
@@ -633,20 +657,35 @@
     shell.execute()
     if (!shell.execute()) {
       // app has failed.
-      
+
       // grab the app report of the last known instance of this app
       // which may not be there if it was a config failure; may be out of date
       // from a previous run
-      log.error("Launch failed with exit code ${shell.ret}.\nLast instance of $name:")
-      slider([ACTION_LIST, name, ARG_VERBOSE]).dumpOutput()
-      
-      // trigger the assertion failure
-      shell.assertExitCode(EXIT_SUCCESS)
+      log.error(
+          "Launch failed with exit code ${shell.ret}")
+      shell.dumpOutput()
+
+      // now grab that app report if it is there
+      def appReport = maybeLookupFromLaunchReport(launchReport)
+      String extraText = ""
+      if (appReport) {
+        log.error("Application report:\n$appReport")
+        extraText = appReport.diagnostics
+      }
+
+      fail("Application Launch Failure, exit code  ${shell.ret}\n${extraText}")
     }
-    
     return shell
   }
 
+  public File createAppReportFile() {
+    File reportFile = File.createTempFile(
+        "launch",
+        ".json",
+        new File("target"))
+    return reportFile
+  }
+
   /**
    * If the option is not null/empty, add the command and the option
    * @param args arg list being built up
@@ -662,7 +701,47 @@
     }
     return args
   }
+  
+  public SerializedApplicationReport maybeLoadAppReport(File reportFile) {
+    if (reportFile.exists() && reportFile.length()> 0) {
+      ApplicationReportSerDeser serDeser = new ApplicationReportSerDeser()
+      def report = serDeser.fromFile(reportFile)
+      return report
+    }    
+    return null;
+  }  
+  
+  public SerializedApplicationReport maybeLookupFromLaunchReport(File launchReport) {
+    def report = maybeLoadAppReport(launchReport)
+    if (report) {
+      return lookupApplication(report.applicationId)
+    } else {
+      return null
+    }
+  }
 
+  /**
+   * Lookup an application, return null if loading failed
+   * @param id application ID
+   * @return an application report or null
+   */
+  public SerializedApplicationReport lookupApplication(String id) {
+    File reportFile = createAppReportFile();
+    try {
+      def shell = lookup(id, reportFile)
+      if (shell.ret) {
+        return maybeLoadAppReport(reportFile)
+      } else {
+        log.warn("Lookup operation failed:\n" + shell.dumpOutput())
+        return null
+      }
+    } finally {
+      reportFile.delete()
+      
+    }
+  }
+
+  
   public Path buildClusterPath(String clustername) {
     return new Path(
         clusterFS.homeDirectory,
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy
index 43ac477..31830d9 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy
@@ -223,11 +223,11 @@
    * if not the output is printed and an assertion is raised
    * @param errorCode expected error code
    */
-  public void assertExitCode(int errorCode) {
+  public void assertExitCode(int errorCode, String extra="") {
     if (this.ret != errorCode) {
       dumpOutput()
       throw new SliderException(ret,
-          "Expected exit code of command ${command} : ${errorCode} - actual=${ret}")
+          "Expected exit code of command ${command} : ${errorCode} - actual=${ret} $extra")
     }
   }
 
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index a4eb1a2..3847e3f 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -50,9 +50,9 @@
     }
 
     cleanup(APPLICATION_NAME)
-    def shell = createTemplatedSliderApplication( APPLICATION_NAME,
-            APP_TEMPLATE2,
-            APP_RESOURCE)
+    def shell = createTemplatedSliderApplication(APPLICATION_NAME,
+        APP_TEMPLATE2,
+        APP_RESOURCE)
 
     logShell(shell)
 
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
new file mode 100644
index 0000000..ce1e0f1
--- /dev/null
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.funtest.lifecycle
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.registry.client.binding.RegistryUtils
+import org.apache.hadoop.registry.client.types.Endpoint
+import org.apache.hadoop.registry.client.types.ServiceRecord
+import org.apache.slider.api.InternalKeys
+import org.apache.slider.common.SliderExitCodes
+import org.apache.slider.common.SliderKeys
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.common.params.SliderActions
+import org.apache.slider.funtest.framework.AgentCommandTestBase
+import org.apache.slider.funtest.framework.FuntestProperties
+import org.apache.slider.funtest.framework.SliderShell
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+
+import static org.apache.slider.core.registry.info.CustomRegistryConstants.*
+
+@CompileStatic
+@Slf4j
+public class AgentLaunchFailureIT extends AgentCommandTestBase
+    implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
+
+
+  static String CLUSTER = "test-agent-launchfail"
+
+  static String APP_RESOURCE2 = "../slider-core/src/test/app_packages/test_command_log/resources_no_role.json"
+
+
+  @Before
+  public void prepareCluster() {
+    setupCluster(CLUSTER)
+ }
+
+  @After
+  public void destroyCluster() {
+    cleanup(CLUSTER)
+  }
+
+  @Test
+  public void testAgentLaunchFailure() throws Throwable {
+    describe("Create a failing cluster and validate failure logic")
+
+    // create an AM which fails to launch within a second
+    File launchReportFile = createAppReportFile();
+    SliderShell shell = createTemplatedSliderApplication(CLUSTER,
+        APP_TEMPLATE,
+        APP_RESOURCE2,
+        [
+            ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_ENABLED, "true",
+            ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "1",
+            ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "100",
+        ],
+        launchReportFile)
+
+    maybeLookupFromLaunchReport(launchReportFile)
+    ensureApplicationIsUp(CLUSTER)
+
+
+    //stop
+    freeze(0, CLUSTER,
+        [
+            ARG_FORCE,
+            ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
+            ARG_MESSAGE, "final-shutdown"
+        ])
+
+    destroy(0, CLUSTER)
+
+    //cluster now missing
+    exists(EXIT_UNKNOWN_INSTANCE, CLUSTER)
+
+  }
+}
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
index 50da8ae..16e65fa 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
@@ -49,9 +49,7 @@
   @Before
   public void prepareCluster() {
     setupCluster(CLUSTER)
-
-
-  }
+ }
 
   @After
   public void destroyCluster() {
@@ -59,7 +57,7 @@
   }
 
   @Test
-  public void testAgentClusterLifecycle() throws Throwable {
+  public void testAgentRegistry() throws Throwable {
     describe("Create a 0-role cluster and make registry queries against it")
 
     // sanity check to verify the config is correct