Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-slider
diff --git a/app-packages/accumulo/appConfig.json b/app-packages/accumulo/appConfig.json
index 8fe9a18..6b7033e 100644
--- a/app-packages/accumulo/appConfig.json
+++ b/app-packages/accumulo/appConfig.json
@@ -8,7 +8,7 @@
     "java_home": "/usr/jdk64/jdk1.7.0_45",
     "package_list": "files/accumulo-${accumulo.version}-bin.tar.gz",
     "site.global.app_user": "yarn",
-    "site.global.app_log_dir": "${AGENT_LOG_ROOT}/app/log",
+    "site.global.app_log_dir": "${AGENT_LOG_ROOT}",
     "site.global.app_pid_dir": "${AGENT_WORK_ROOT}/app/run",
     "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/accumulo-${accumulo.version}",
     "site.global.app_install_dir": "${AGENT_WORK_ROOT}/app/install",
diff --git a/app-packages/accumulo/metainfo.xml b/app-packages/accumulo/metainfo.xml
index 4cf6c79..b1aa9de 100644
--- a/app-packages/accumulo/metainfo.xml
+++ b/app-packages/accumulo/metainfo.xml
@@ -40,7 +40,7 @@
             </value>
           </export>
           <export>
-            <name>org.apache.slider.jmx</name>
+            <name>app.jmx</name>
             <value>
               ${site.global.monitor_protocol}://${ACCUMULO_MONITOR_HOST}:${site.accumulo-site.monitor.port.client}/xml
             </value>
@@ -85,6 +85,7 @@
         <name>ACCUMULO_MONITOR</name>
         <category>MASTER</category>
         <publishConfig>true</publishConfig>
+        <appExports>QuickLinks-app.jmx,QuickLinks-org.apache.slider.monitor</appExports>
         <commandScript>
           <script>scripts/accumulo_monitor.py</script>
           <scriptType>PYTHON</scriptType>
diff --git a/app-packages/accumulo/pom.xml b/app-packages/accumulo/pom.xml
index 45dfd87..bcf97e9 100644
--- a/app-packages/accumulo/pom.xml
+++ b/app-packages/accumulo/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -81,6 +81,7 @@
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-failsafe-plugin</artifactId>
+            <version>${maven-failsafe-plugin.version}</version>
             <executions>
               <execution>
                 <id>run-integration-tests</id>
@@ -122,14 +123,6 @@
     <plugins>
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
@@ -143,15 +136,6 @@
           </dependency>
         </dependencies>
       </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <!-- can't figure out how to get the surefire plugin not to pick up the ITs, so skip it entirely -->
-          <skip>true</skip>
-        </configuration>
-      </plugin>
     </plugins>
   </build>
 
@@ -185,6 +169,12 @@
     </dependency>
     <dependency>
       <groupId>org.apache.slider</groupId>
+      <artifactId>slider-core</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.slider</groupId>
       <artifactId>slider-funtest</artifactId>
       <scope>test</scope>
     </dependency>
@@ -193,6 +183,11 @@
       <artifactId>groovy-all</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
 </project>
diff --git a/app-packages/app-pkg-template/README.txt b/app-packages/app-pkg-template/README.txt
new file mode 100644
index 0000000..00dfdbc
--- /dev/null
+++ b/app-packages/app-pkg-template/README.txt
@@ -0,0 +1,34 @@
+<!---
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+How to create a Slider app package for myapp?
+
+To create the app package you will need the application tarball copied to a specific location.
+
+E.g.
+  cp ~/Downloads/myapp-1.0.0.tar package/files/
+
+Create a zip package at the root of the package (<slider enlistment>/app-packages/myapp/)
+  zip -r myapp-1.0.0.zip .
+
+Verify the content using  
+  zip -Tv myapp-1.0.0.zip
+
+While appConfig.json and resources.json are not required for the package they work
+well as the default configuration for Slider apps. So its advisable that when you
+create an application package for Slider, include sample/default resources.json and
+appConfig.json for a one-node Yarn cluster.
diff --git a/app-packages/app-pkg-template/appConfig.json b/app-packages/app-pkg-template/appConfig.json
new file mode 100644
index 0000000..a6f61f9
--- /dev/null
+++ b/app-packages/app-pkg-template/appConfig.json
@@ -0,0 +1,21 @@
+{
+  "schema": "http://example.org/specification/v2.0.0",
+  "metadata": {
+  },
+  "global": {
+    "application.def": "package/myapp-1.0.0.zip",
+    "java_home": "/usr/jdk64/jdk1.7.0_45",
+
+    "site.global.app_user": "yarn",
+    "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/myapp-1.0.0",
+
+    "site.global.listen_port": "${MYAPP_COMPONENT.ALLOCATED_PORT}"
+  },
+  "components": {
+    "slider-appmaster": {
+      "jvm.heapsize": "256M"
+    },
+    "MYAPP_COMPONENT": {
+    }
+  }
+}
diff --git a/app-packages/app-pkg-template/metainfo.xml b/app-packages/app-pkg-template/metainfo.xml
new file mode 100644
index 0000000..c6e1485
--- /dev/null
+++ b/app-packages/app-pkg-template/metainfo.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<metainfo>
+  <schemaVersion>2.0</schemaVersion>
+  <application>
+    <name>MyApp</name>
+    <comment>MyApp is an app that can run on YARN.</comment>
+    <version>1.0.0</version>
+    <exportedConfigs>None</exportedConfigs>
+
+    <components>
+      <component>
+        <name>MYAPP_COMPONENT</name>
+        <category>MASTER</category>
+        <exports>
+          <export>
+            <name>host_port</name>
+            <value>${THIS_HOST}:${site.global.listen_port}</value>
+          </export>
+        </exports>
+        <commandScript>
+          <script>scripts/myapp_component.py</script>
+          <scriptType>PYTHON</scriptType>
+        </commandScript>
+      </component>
+    </components>
+
+    <osSpecifics>
+      <osSpecific>
+        <osType>any</osType>
+        <packages>
+          <package>
+            <type>tarball</type>
+            <name>files/myapp-1.0.0.tar</name>
+          </package>
+        </packages>
+      </osSpecific>
+    </osSpecifics>
+
+  </application>
+</metainfo>
diff --git a/app-packages/app-pkg-template/package/files/myapp-1.0.0.tar.REPLACE b/app-packages/app-pkg-template/package/files/myapp-1.0.0.tar.REPLACE
new file mode 100644
index 0000000..2114587
--- /dev/null
+++ b/app-packages/app-pkg-template/package/files/myapp-1.0.0.tar.REPLACE
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace this file with a tarball of the myapp version you want to package.
diff --git a/app-packages/app-pkg-template/package/scripts/myapp_master.py b/app-packages/app-pkg-template/package/scripts/myapp_master.py
new file mode 100644
index 0000000..e6bc867
--- /dev/null
+++ b/app-packages/app-pkg-template/package/scripts/myapp_master.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+class MyApp_Component(Script):
+  def install(self, env):
+    self.install_packages(env)
+
+  def configure(self, env):
+    import params
+    env.set_params(params)
+
+  def start(self, env):
+    import params
+    env.set_params(params)
+    self.configure(env)
+
+  def stop(self, env):
+    import params
+    env.set_params(params)
+
+  def status(self, env):
+    import params
+    env.set_params(params)
+
+if __name__ == "__main__":
+  MyApp_Component().execute()
diff --git a/app-packages/app-pkg-template/package/scripts/params.py b/app-packages/app-pkg-template/package/scripts/params.py
new file mode 100644
index 0000000..e81bda0
--- /dev/null
+++ b/app-packages/app-pkg-template/package/scripts/params.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+app_root = config['configurations']['global']['app_root']
+java64_home = config['hostLevelParams']['java_home']
+app_user = config['configurations']['global']['app_user']
+port = config['configurations']['global']['listen_port']
+
diff --git a/app-packages/app-pkg-template/resources.json b/app-packages/app-pkg-template/resources.json
new file mode 100644
index 0000000..0e95879
--- /dev/null
+++ b/app-packages/app-pkg-template/resources.json
@@ -0,0 +1,16 @@
+{
+  "schema" : "http://example.org/specification/v2.0.0",
+  "metadata" : {
+  },
+  "global" : {
+  },
+  "components": {
+    "slider-appmaster": {
+    },
+    "MYAPP_COMPONENT": {
+      "yarn.role.priority": "1",
+      "yarn.component.instances": "1",
+      "yarn.memory": "256"
+    }
+  }
+}
\ No newline at end of file
diff --git a/app-packages/command-logger/application-pkg/pom.xml b/app-packages/command-logger/application-pkg/pom.xml
index 53f7fd2..18c5435 100644
--- a/app-packages/command-logger/application-pkg/pom.xml
+++ b/app-packages/command-logger/application-pkg/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -55,11 +55,6 @@
       </plugin>
 
       <plugin>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.0</version>
-      </plugin>
-
-      <plugin>
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
         <version>${apache-rat-plugin.version}</version>
diff --git a/app-packages/command-logger/slider-pkg/pom.xml b/app-packages/command-logger/slider-pkg/pom.xml
index 0971868..2bb19b8 100644
--- a/app-packages/command-logger/slider-pkg/pom.xml
+++ b/app-packages/command-logger/slider-pkg/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/app-packages/hbase/README.txt b/app-packages/hbase/README.txt
index b4e4ccd..1d5c4bb 100644
--- a/app-packages/hbase/README.txt
+++ b/app-packages/hbase/README.txt
@@ -28,14 +28,14 @@
 ****** OPTION - I (use mvn command) **
 You need the HBase version available on local maven repo to create the Slider App Package for HBase.
 
-The version of HBase used for the app package can be adjusted by adding a
-flag such as
-  -Dhbase.version=0.98.3
-
 Download the tarball for HBase:
   e.g. path to tarball ~/Downloads/hbase-0.98.3-hadoop2-bin.tar.gz
 
-Use the following command to install HBase tarball locally:
+The version of HBase used for the app package can be adjusted by adding a
+flag such as
+  -Dhbase.version=0.98.3-hadoop2
+
+Use the following command to install HBase tarball locally (under local workspace of HBase repo):
   mvn install:install-file -Dfile=<path-to-tarball> -DgroupId=org.apache.hbase -DartifactId=hbase -Dversion=0.98.3-hadoop2 -Dclassifier=bin -Dpackaging=tar.gz
 
 You may need to copy the hbase tarball to the following location if the above step doesn't publish the tarball:
diff --git a/app-packages/hbase/appConfig.json b/app-packages/hbase/appConfig.json
index 20cd436..d00ae6d 100644
--- a/app-packages/hbase/appConfig.json
+++ b/app-packages/hbase/appConfig.json
@@ -9,7 +9,7 @@
     "java_home": "/usr/jdk64/jdk1.7.0_45",
     "package_list": "files/hbase-${hbase.version}-bin.tar.gz",
     "site.global.app_user": "yarn",
-    "site.global.app_log_dir": "${AGENT_LOG_ROOT}/app/log",
+    "site.global.app_log_dir": "${AGENT_LOG_ROOT}",
     "site.global.app_pid_dir": "${AGENT_WORK_ROOT}/app/run",
     "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/hbase-${hbase.version}",
     "site.global.app_install_dir": "${AGENT_WORK_ROOT}/app/install",
@@ -23,6 +23,9 @@
     "site.global.ganglia_server_host": "${NN_HOST}",
     "site.global.ganglia_server_port": "8667",
     "site.global.ganglia_server_id": "Application1",
+    "site.global.hbase_thrift_port": "${HBASE_THRIFT.ALLOCATED_PORT}",
+    "site.global.hbase_thrift2_port": "${HBASE_THRIFT2.ALLOCATED_PORT}",
+    "site.global.hbase_rest_port": "${HBASE_REST.ALLOCATED_PORT}",
     "site.hbase-site.hbase.hstore.flush.retries.number": "120",
     "site.hbase-site.hbase.client.keyvalue.maxsize": "10485760",
     "site.hbase-site.hbase.hstore.compactionThreshold": "3",
diff --git a/app-packages/hbase/get-hbase-site.py b/app-packages/hbase/get-hbase-site.py
new file mode 100644
index 0000000..9760781
--- /dev/null
+++ b/app-packages/hbase/get-hbase-site.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Gets hbase-site.xml from running HBase instance
+First argument is the name of cluster instance
+"""
+
+import sys
+import urllib2
+import subprocess
+
+f=subprocess.Popen("slider status "+sys.argv[1], shell=True, stdout=subprocess.PIPE).stdout
+for line in f:
+  pos = line.find("info.am.web.url")
+  if pos > 0 :
+    part = line[(pos+20) :]
+    endPos = part.find("\"")
+    url = part[: (endPos-1)]
+    url = url + "/ws/v1/slider/publisher/slider/hbase-site.xml"
+    print url
+    response = urllib2.urlopen(url)
+    html = response.read()
+
+    fout=open("hbase-site.xml", "w")
+    fout.write(html)
+    fout.close()
+    f.close()
+
+    sys.exit(0)
+
+print "info.am.web.url key was not found for " + sys.argv[1]
+sys.exit(1)
diff --git a/app-packages/hbase/get-hbase-site.sh b/app-packages/hbase/get-hbase-site.sh
index 0edac30..5211d83 100755
--- a/app-packages/hbase/get-hbase-site.sh
+++ b/app-packages/hbase/get-hbase-site.sh
@@ -1,24 +1,17 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 tuple=`slider status $1 | grep "info.am.web.url"`
+echo $tuple
 FS=":"
 url=`echo $tuple | awk '{split($0,array,": ")} END{print array[2]}'`
 url="${url%,}"
 url="${url%\"}"
 url="${url#\"}"
-url="${url}ws/v1/slider/publisher/slider/hbase-site.xml"
-curl -k -o hbase-site.xml $url
+siteurl="${url}ws/v1/slider/publisher/slider/hbase-site.xml"
+curl -k -o hbase-site.dnld $siteurl
+grep -v 'hbase.tmp.dir' hbase-site.dnld > hbase-site.xml
+
+linksurl="${url}ws/v1/slider/publisher/slider/quicklinks"
+curl -k -o links.json $linksurl
+python $DIR/links.py
+#| sed -e 's/\/\///g' | awk 'BEGIN { FS = ":" } ; { print $2 }'
diff --git a/app-packages/hbase/links.py b/app-packages/hbase/links.py
new file mode 100644
index 0000000..19b0d91
--- /dev/null
+++ b/app-packages/hbase/links.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from __future__ import print_function
+import logging
+import json
+
+file = open("links.json")
+links = json.load(file)
+file.close()
+if links.has_key("entries"):
+  entries = links["entries"]
+  if entries.has_key("org.apache.slider.hbase.rest"):
+    print("org.apache.slider.hbase.rest : %s" % entries["org.apache.slider.hbase.rest"])
+  if entries.has_key("org.apache.slider.hbase.thrift"):
+    print("org.apache.slider.hbase.thrift : %s" % entries["org.apache.slider.hbase.thrift"])
+  if entries.has_key("org.apache.slider.hbase.thrift2"):
+    print("org.apache.slider.hbase.thrift2 : %s" % entries["org.apache.slider.hbase.thrift2"])
diff --git a/app-packages/hbase/metainfo.xml b/app-packages/hbase/metainfo.xml
index 99413f6..aae048d 100644
--- a/app-packages/hbase/metainfo.xml
+++ b/app-packages/hbase/metainfo.xml
@@ -41,11 +41,23 @@
             <value>http://${HBASE_MASTER_HOST}:${site.hbase-site.hbase.master.info.port}/master-status</value>
           </export>
           <export>
-            <name>org.apache.slider.metrics</name>
+            <name>org.apache.slider.hbase.rest</name>
+            <value>http://${HBASE_REST_HOST}:${site.global.hbase_rest_port}</value>
+          </export>
+          <export>
+            <name>org.apache.slider.hbase.thrift2</name>
+            <value>http://${HBASE_THRIFT2_HOST}:${site.global.hbase_thrift2_port}</value>
+          </export>
+          <export>
+            <name>org.apache.slider.hbase.thrift</name>
+            <value>http://${HBASE_THRIFT_HOST}:${site.global.hbase_thrift_port}</value>
+          </export>
+          <export>
+            <name>app.metrics</name>
             <value>http://${site.global.ganglia_server_host}/cgi-bin/rrd.py?c=${site.global.ganglia_server_id}</value>
           </export>
           <export>
-            <name>org.apache.slider.ganglia</name>
+            <name>app.ganglia</name>
             <value>http://${site.global.ganglia_server_host}/ganglia?c=${site.global.ganglia_server_id}</value>
           </export>
         </exports>
@@ -56,6 +68,12 @@
         <command>HBASE_REGIONSERVER-START</command>
         <requires>HBASE_MASTER-STARTED</requires>
       </commandOrder>
+      <commandOrder>
+        <command>HBASE_MASTER-START</command>
+        <requires>HBASE_REST-INSTALLED</requires>
+        <requires>HBASE_THRIFT-INSTALLED</requires>
+        <requires>HBASE_THRIFT2-INSTALLED</requires>
+      </commandOrder>
     </commandOrders>
     <components>
       <component>
@@ -63,6 +81,17 @@
         <category>MASTER</category>
         <minInstanceCount>1</minInstanceCount>
         <maxInstanceCount>2</maxInstanceCount>
+        <appExports>QuickLinks-org.apache.slider.jmx,QuickLinks-org.apache.slider.monitor,QuickLinks-app.metrics,QuickLinks-app.ganglia</appExports>
+        <componentExports>
+          <componentExport>
+            <name>app.jmx</name>
+            <value>${THIS_HOST}:${site.hbase-site.hbase.master.info.port}/jmx</value>
+          </componentExport>
+          <componentExport>
+            <name>app.monitor</name>
+            <value>${THIS_HOST}:${site.hbase-site.hbase.master.info.port}/master-status</value>
+          </componentExport>
+        </componentExports>
         <commandScript>
           <script>scripts/hbase_master.py</script>
           <scriptType>PYTHON</scriptType>
@@ -81,6 +110,39 @@
       </component>
 
       <component>
+        <name>HBASE_REST</name>
+        <category>MASTER</category>
+        <minInstanceCount>0</minInstanceCount>
+        <appExports>QuickLinks-org.apache.slider.hbase.rest</appExports>
+        <commandScript>
+          <script>scripts/hbase_rest.py</script>
+          <scriptType>PYTHON</scriptType>
+        </commandScript>
+      </component>
+
+      <component>
+        <name>HBASE_THRIFT</name>
+        <category>MASTER</category>
+        <minInstanceCount>0</minInstanceCount>
+        <appExports>QuickLinks-org.apache.slider.hbase.thrift</appExports>
+        <commandScript>
+          <script>scripts/hbase_thrift.py</script>
+          <scriptType>PYTHON</scriptType>
+        </commandScript>
+      </component>
+
+      <component>
+        <name>HBASE_THRIFT2</name>
+        <category>MASTER</category>
+        <minInstanceCount>0</minInstanceCount>
+        <appExports>QuickLinks-org.apache.slider.hbase.thrift2</appExports>
+        <commandScript>
+          <script>scripts/hbase_thrift2.py</script>
+          <scriptType>PYTHON</scriptType>
+        </commandScript>
+      </component>
+
+      <component>
         <name>HBASE_CLIENT</name>
         <category>CLIENT</category>
         <minInstanceCount>0</minInstanceCount>
diff --git a/app-packages/hbase/package/scripts/hbase_regionserver.py b/app-packages/hbase/package/scripts/hbase_regionserver.py
index 8d66dcc..daa5732 100644
--- a/app-packages/hbase/package/scripts/hbase_regionserver.py
+++ b/app-packages/hbase/package/scripts/hbase_regionserver.py
@@ -58,9 +58,6 @@
     pid_file = format("{pid_dir}/hbase-{hbase_user}-regionserver.pid")
     check_process_status(pid_file)
     
-  def decommission(self, env):
-    print "Decommission not yet implemented!"
-    
 
 if __name__ == "__main__":
   HbaseRegionServer().execute()
diff --git a/app-packages/hbase/package/scripts/hbase_rest.py b/app-packages/hbase/package/scripts/hbase_rest.py
new file mode 100644
index 0000000..36b51f9
--- /dev/null
+++ b/app-packages/hbase/package/scripts/hbase_rest.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+from hbase import hbase
+from hbase_service import hbase_service
+
+         
+class HbaseRest(Script):
+  def install(self, env):
+    self.install_packages(env)
+    
+  def configure(self, env):
+    import params
+    env.set_params(params)
+
+    hbase(name='rest')
+      
+  def start(self, env):
+    import params
+    env.set_params(params)
+    self.configure(env) # for security
+
+    hbase_service( 'rest',
+      action = 'start'
+    )
+    
+  def stop(self, env):
+    import params
+    env.set_params(params)
+
+    hbase_service( 'rest',
+      action = 'stop'
+    )
+
+  def status(self, env):
+    import status_params
+    env.set_params(status_params)
+    pid_file = format("{pid_dir}/hbase-{hbase_user}-rest.pid")
+    check_process_status(pid_file)
+    
+if __name__ == "__main__":
+  HbaseRest().execute()
diff --git a/app-packages/hbase/package/scripts/hbase_service.py b/app-packages/hbase/package/scripts/hbase_service.py
index 2b30083..96add84 100644
--- a/app-packages/hbase/package/scripts/hbase_service.py
+++ b/app-packages/hbase/package/scripts/hbase_service.py
@@ -35,6 +35,12 @@
     
     if action == 'start':
       daemon_cmd = format("{cmd} start {role}")
+      if name == 'rest':
+        daemon_cmd = format("{daemon_cmd} -p {rest_port}")
+      elif name == 'thrift':
+        daemon_cmd = format("{daemon_cmd} -p {thrift_port}")
+      elif name == 'thrift2':
+        daemon_cmd = format("{daemon_cmd} -p {thrift2_port}")
       no_op_test = format("ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1")
     elif action == 'stop':
       daemon_cmd = format("{cmd} stop {role} && rm -f {pid_file}")
diff --git a/app-packages/hbase/package/scripts/hbase_thrift.py b/app-packages/hbase/package/scripts/hbase_thrift.py
new file mode 100644
index 0000000..84bfc62
--- /dev/null
+++ b/app-packages/hbase/package/scripts/hbase_thrift.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+from hbase import hbase
+from hbase_service import hbase_service
+
+         
+class HbaseThrift(Script):
+  def install(self, env):
+    self.install_packages(env)
+    
+  def configure(self, env):
+    import params
+    env.set_params(params)
+
+    hbase(name='thrift')
+      
+  def start(self, env):
+    import params
+    env.set_params(params)
+    self.configure(env) # for security
+
+    hbase_service( 'thrift',
+      action = 'start'
+    )
+    
+  def stop(self, env):
+    import params
+    env.set_params(params)
+
+    hbase_service( 'thrift',
+      action = 'stop'
+    )
+
+  def status(self, env):
+    import status_params
+    env.set_params(status_params)
+    pid_file = format("{pid_dir}/hbase-{hbase_user}-thrift.pid")
+    check_process_status(pid_file)
+    
+if __name__ == "__main__":
+  HbaseThrift().execute()
diff --git a/app-packages/hbase/package/scripts/hbase_thrift2.py b/app-packages/hbase/package/scripts/hbase_thrift2.py
new file mode 100644
index 0000000..b72196c
--- /dev/null
+++ b/app-packages/hbase/package/scripts/hbase_thrift2.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+from hbase import hbase
+from hbase_service import hbase_service
+
+         
+class HbaseThrift2(Script):
+  def install(self, env):
+    self.install_packages(env)
+    
+  def configure(self, env):
+    import params
+    env.set_params(params)
+
+    hbase(name='thrift2')
+      
+  def start(self, env):
+    import params
+    env.set_params(params)
+    self.configure(env) # for security
+
+    hbase_service( 'thrift2',
+      action = 'start'
+    )
+    
+  def stop(self, env):
+    import params
+    env.set_params(params)
+
+    hbase_service( 'thrift2',
+      action = 'stop'
+    )
+
+  def status(self, env):
+    import status_params
+    env.set_params(status_params)
+    pid_file = format("{pid_dir}/hbase-{hbase_user}-thrift2.pid")
+    check_process_status(pid_file)
+    
+if __name__ == "__main__":
+  HbaseThrift2().execute()
diff --git a/app-packages/hbase/package/scripts/params.py b/app-packages/hbase/package/scripts/params.py
index 0d8b04b..1f25f68 100644
--- a/app-packages/hbase/package/scripts/params.py
+++ b/app-packages/hbase/package/scripts/params.py
@@ -57,6 +57,10 @@
 ganglia_server_host = default('/configurations/global/ganglia_server_host', '')
 ganglia_server_port = default('/configurations/global/ganglia_server_port', '8663')
 
+rest_port = config['configurations']['global']['hbase_rest_port']
+thrift_port = config['configurations']['global']['hbase_thrift_port']
+thrift2_port = config['configurations']['global']['hbase_thrift2_port']
+
 if security_enabled:
   
   _use_hostname_in_principal = default('instance_name', True)
diff --git a/app-packages/hbase/pom.xml b/app-packages/hbase/pom.xml
index 3854496..7dede6c 100644
--- a/app-packages/hbase/pom.xml
+++ b/app-packages/hbase/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.31.0-incubating-SNAPSHOT</version>
+    <version>0.41.0-incubating-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -121,14 +121,6 @@
     <plugins>
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
diff --git a/app-packages/hbase/resources.json b/app-packages/hbase/resources.json
index e0ff26f..d2fdbd8 100644
--- a/app-packages/hbase/resources.json
+++ b/app-packages/hbase/resources.json
@@ -16,6 +16,21 @@
       "yarn.role.priority": "2",
       "yarn.component.instances": "1",
       "yarn.memory": "256"
+    },
+    "HBASE_REST": {
+      "yarn.role.priority": "3",
+      "yarn.component.instances": "1",
+      "yarn.memory": "256"
+    },
+    "HBASE_THRIFT": {
+      "yarn.role.priority": "4",
+      "yarn.component.instances": "1",
+      "yarn.memory": "256"
+    },
+    "HBASE_THRIFT2": {
+      "yarn.role.priority": "5",
+      "yarn.component.instances": "1",
+      "yarn.memory": "256"
     }
   }
 }
diff --git a/app-packages/memcached-win/README.txt b/app-packages/memcached-win/README.txt
new file mode 100644
index 0000000..4d93b91
--- /dev/null
+++ b/app-packages/memcached-win/README.txt
@@ -0,0 +1,36 @@
+<!---
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+How to create a Slider app package for Memcached for Windows?
+
+To create the app package you will need the Memcached tarball copied to a specific location.
+
+Replace the placeholder jar files for JMemcached.
+  cp ~/Downloads/jmemcached-cli-1.0.0.jar package/files/jmemcached-1.0.0/
+  cp ~/Downloads/jmemcached-core-1.0.0.jar package/files/jmemcached-1.0.0/
+  rm package/files/jmemcached-1.0.0/*.REPLACEME
+
+Create a zip package at the root of the package (<slider enlistment>/app-packages/memcached/)
+  zip -r jmemcached-1.0.0.zip .
+
+Verify the content using  
+  unzip -l "$@" jmemcached-1.0.0.zip
+
+While appConfig.json and resources.json are not required for the package they work
+well as the default configuration for Slider apps. So its advisable that when you
+create an application package for Slider, include sample/default resources.json and
+appConfig.json for a minimal Yarn cluster.
diff --git a/app-packages/memcached-win/appConfig.json b/app-packages/memcached-win/appConfig.json
new file mode 100644
index 0000000..b76ecde
--- /dev/null
+++ b/app-packages/memcached-win/appConfig.json
@@ -0,0 +1,26 @@
+{
+  "schema": "http://example.org/specification/v2.0.0",
+  "metadata": {
+  },
+  "global": {
+    "application.def": "/slider/jmemcached-1.0.0.zip",
+    "java_home": "C:\\java",
+
+    "site.global.app_user": "hadoop",
+    "site.global.app_root": "${AGENT_WORK_ROOT}\\app\\install",
+    "site.global.pid_file": "${AGENT_WORK_ROOT}\\app\\run\\component.pid",
+    "site.global.additional_cp": "C:\\hdp\\hadoop-2.4.0.2.1.3.0-1990\\share\\hadoop\\common\\lib\\*",
+    "site.global.xmx_val": "256m",
+    "site.global.xms_val": "128m",
+    "site.global.memory_val": "200M",
+    "site.global.listen_port": "${MEMCACHED.ALLOCATED_PORT}{DO_NOT_PROPAGATE}"
+
+  },
+  "components": {
+    "slider-appmaster": {
+      "jvm.heapsize": "256M"
+    },
+    "MEMCACHED": {
+    }
+  }
+}
diff --git a/app-packages/memcached-win/metainfo.xml b/app-packages/memcached-win/metainfo.xml
new file mode 100644
index 0000000..d056c0a
--- /dev/null
+++ b/app-packages/memcached-win/metainfo.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<metainfo>
+  <schemaVersion>2.0</schemaVersion>
+  <application>
+    <name>MEMCACHED</name>
+    <comment>Memcache is a network accessible key/value storage system, often used as a distributed cache.</comment>
+    <version>1.0.0</version>
+    <exportedConfigs>None</exportedConfigs>
+
+    <components>
+      <component>
+        <name>MEMCACHED</name>
+        <category>MASTER</category>
+        <exports>
+          <export>
+            <name>host_port</name>
+            <value>${THIS_HOST}:${site.global.listen_port}</value>
+          </export>
+        </exports>
+        <commandScript>
+          <script>scripts/memcached.py</script>
+          <scriptType>PYTHON</scriptType>
+        </commandScript>
+      </component>
+    </components>
+
+    <osSpecifics>
+      <osSpecific>
+        <osType>any</osType>
+        <packages>
+          <package>
+            <type>folder</type>
+            <name>files\\jmemcached-1.0.0</name>
+          </package>
+        </packages>
+      </osSpecific>
+    </osSpecifics>
+
+  </application>
+</metainfo>
diff --git a/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-cli-1.0.0.jar.REPLACEME b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-cli-1.0.0.jar.REPLACEME
new file mode 100644
index 0000000..6855ef9
--- /dev/null
+++ b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-cli-1.0.0.jar.REPLACEME
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace with the real jar.
\ No newline at end of file
diff --git a/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-core-1.0.0.jar.REPLACEME b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-core-1.0.0.jar.REPLACEME
new file mode 100644
index 0000000..6855ef9
--- /dev/null
+++ b/app-packages/memcached-win/package/files/jmemcached-1.0.0/jmemcached-core-1.0.0.jar.REPLACEME
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace with the real jar.
\ No newline at end of file
diff --git a/app-packages/memcached-win/package/scripts/memcached.py b/app-packages/memcached-win/package/scripts/memcached.py
new file mode 100644
index 0000000..bc9905d
--- /dev/null
+++ b/app-packages/memcached-win/package/scripts/memcached.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+class Memcached(Script):
+  def install(self, env):
+    self.install_packages(env)
+    pass
+
+  def configure(self, env):
+    import params
+    env.set_params(params)
+
+  def start(self, env):
+    import params
+    env.set_params(params)
+    self.configure(env)
+    process_cmd = format("{java64_home}\\bin\\java -Xmx{xmx_val} -Xms{xms_val} -classpath {app_root}\\*;{additional_cp} com.thimbleware.jmemcached.Main --memory={memory_val} --port={port}")
+
+    Execute(process_cmd,
+        user=params.app_user,
+        logoutput=False,
+        wait_for_finish=False,
+        pid_file=params.pid_file
+    )
+
+  def stop(self, env):
+    import params
+    env.set_params(params)
+
+  def status(self, env):
+    import params
+    env.set_params(params)
+    #Check process status need to be changed for Windows
+    #check_process_status(params.pid_file)
+
+if __name__ == "__main__":
+  Memcached().execute()
diff --git a/app-packages/memcached-win/package/scripts/params.py b/app-packages/memcached-win/package/scripts/params.py
new file mode 100644
index 0000000..fab3714
--- /dev/null
+++ b/app-packages/memcached-win/package/scripts/params.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+app_root = config['configurations']['global']['app_root']
+java64_home = config['hostLevelParams']['java_home']
+app_user = config['configurations']['global']['app_user']
+pid_file = config['configurations']['global']['pid_file']
+additional_cp = config['configurations']['global']['additional_cp']
+xmx_val = config['configurations']['global']['xmx_val']
+xms_val = config['configurations']['global']['xms_val']
+memory_val = config['configurations']['global']['memory_val']
+port = config['configurations']['global']['listen_port']
diff --git a/app-packages/memcached-win/resources.json b/app-packages/memcached-win/resources.json
new file mode 100644
index 0000000..f0e02ac
--- /dev/null
+++ b/app-packages/memcached-win/resources.json
@@ -0,0 +1,16 @@
+{
+  "schema" : "http://example.org/specification/v2.0.0",
+  "metadata" : {
+  },
+  "global" : {
+  },
+  "components": {
+    "slider-appmaster": {
+    },
+    "MEMCACHED": {
+      "yarn.role.priority": "1",
+      "yarn.component.instances": "1",
+      "yarn.memory": "256"
+    }
+  }
+}
\ No newline at end of file
diff --git a/app-packages/memcached/README.txt b/app-packages/memcached/README.txt
new file mode 100644
index 0000000..eed2954
--- /dev/null
+++ b/app-packages/memcached/README.txt
@@ -0,0 +1,35 @@
+<!---
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+How to create a Slider app package for Memcached?
+
+To create the app package you will need the Memcached tarball copied to a specific location.
+
+Replace the placeholder tarball for JMemcached.
+  cp ~/Downloads/jmemcached-1.0.0.tar package/files/
+  rm package/files/jmemcached-1.0.0.tar.REPLACE
+
+Create a zip package at the root of the package (<slider enlistment>/app-packages/memcached/)
+  zip -r jmemcached-1.0.0.zip .
+
+Verify the content using  
+  unzip -l "$@" jmemcached-1.0.0.zip
+
+While appConfig.json and resources.json are not required for the package they work
+well as the default configuration for Slider apps. So its advisable that when you
+create an application package for Slider, include sample/default resources.json and
+appConfig.json for a minimal Yarn cluster.
diff --git a/app-packages/memcached/appConfig.json b/app-packages/memcached/appConfig.json
new file mode 100644
index 0000000..5f32030
--- /dev/null
+++ b/app-packages/memcached/appConfig.json
@@ -0,0 +1,26 @@
+{
+  "schema": "http://example.org/specification/v2.0.0",
+  "metadata": {
+  },
+  "global": {
+    "application.def": "package/jmemcached-1.0.0.zip",
+    "java_home": "/usr/jdk64/jdk1.7.0_45",
+
+    "site.global.app_user": "yarn",
+    "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/jmemcached-1.0.0",
+    "site.global.pid_file": "${AGENT_WORK_ROOT}/app/run/component.pid",
+
+    "site.global.additional_cp": "/usr/lib/hadoop/lib/*",
+    "site.global.xmx_val": "256m",
+    "site.global.xms_val": "128m",
+    "site.global.memory_val": "200M",
+    "site.global.listen_port": "${MEMCACHED.ALLOCATED_PORT}{DO_NOT_PROPAGATE}"
+  },
+  "components": {
+    "slider-appmaster": {
+      "jvm.heapsize": "256M"
+    },
+    "MEMCACHED": {
+    }
+  }
+}
diff --git a/app-packages/memcached/metainfo.xml b/app-packages/memcached/metainfo.xml
new file mode 100644
index 0000000..525816e
--- /dev/null
+++ b/app-packages/memcached/metainfo.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<metainfo>
+  <schemaVersion>2.0</schemaVersion>
+  <application>
+    <name>MEMCACHED</name>
+    <comment>Memcache is a network accessible key/value storage system, often used as a distributed cache.</comment>
+    <version>1.0.0</version>
+    <exportedConfigs>None</exportedConfigs>
+
+    <components>
+      <component>
+        <name>MEMCACHED</name>
+        <category>MASTER</category>
+        <exports>
+          <export>
+            <name>host_port</name>
+            <value>${THIS_HOST}:${site.global.listen_port}</value>
+          </export>
+        </exports>
+        <commandScript>
+          <script>scripts/memcached.py</script>
+          <scriptType>PYTHON</scriptType>
+        </commandScript>
+      </component>
+    </components>
+
+    <osSpecifics>
+      <osSpecific>
+        <osType>any</osType>
+        <packages>
+          <package>
+            <type>tarball</type>
+            <name>files/jmemcached-1.0.0.tar</name>
+          </package>
+        </packages>
+      </osSpecific>
+    </osSpecifics>
+
+  </application>
+</metainfo>
diff --git a/app-packages/memcached/package/files/jmemcached-1.0.0.tar.REPLACE b/app-packages/memcached/package/files/jmemcached-1.0.0.tar.REPLACE
new file mode 100644
index 0000000..91a16d9
--- /dev/null
+++ b/app-packages/memcached/package/files/jmemcached-1.0.0.tar.REPLACE
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Replace this file with a tarball which has the structure
+
+jmemcached-1.0.0/
+jmemcached-1.0.0/jmemcached-cli-1.0.0.jar
+jmemcached-1.0.0/jmemcached-core-1.0.0.jar
diff --git a/app-packages/memcached/package/scripts/memcached.py b/app-packages/memcached/package/scripts/memcached.py
new file mode 100644
index 0000000..6e14e86
--- /dev/null
+++ b/app-packages/memcached/package/scripts/memcached.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+
+class Memcached(Script):
+  def install(self, env):
+    self.install_packages(env)
+    pass
+
+  def configure(self, env):
+    import params
+    env.set_params(params)
+
+  def start(self, env):
+    import params
+    env.set_params(params)
+    self.configure(env)
+    process_cmd = format("{java64_home}/bin/java -Xmx{xmx_val} -Xms{xms_val} -classpath {app_root}/*:{additional_cp} com.thimbleware.jmemcached.Main --memory={memory_val} --port={port}")
+
+    Execute(process_cmd,
+        user=params.app_user,
+        logoutput=False,
+        wait_for_finish=False,
+        pid_file=params.pid_file
+    )
+
+  def stop(self, env):
+    import params
+    env.set_params(params)
+
+  def status(self, env):
+    import params
+    env.set_params(params)
+    check_process_status(params.pid_file)
+
+if __name__ == "__main__":
+  Memcached().execute()
diff --git a/app-packages/memcached/package/scripts/params.py b/app-packages/memcached/package/scripts/params.py
new file mode 100644
index 0000000..25b4055
--- /dev/null
+++ b/app-packages/memcached/package/scripts/params.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+app_root = config['configurations']['global']['app_root']
+java64_home = config['hostLevelParams']['java_home']
+app_user = config['configurations']['global']['app_user']
+pid_file = config['configurations']['global']['pid_file']
+
+additional_cp = config['configurations']['global']['additional_cp']
+xmx_val = config['configurations']['global']['xmx_val']
+xms_val = config['configurations']['global']['xms_val']
+memory_val = config['configurations']['global']['memory_val']
+port = config['configurations']['global']['listen_port']
diff --git a/app-packages/memcached/resources.json b/app-packages/memcached/resources.json
new file mode 100644
index 0000000..f0e02ac
--- /dev/null
+++ b/app-packages/memcached/resources.json
@@ -0,0 +1,16 @@
+{
+  "schema" : "http://example.org/specification/v2.0.0",
+  "metadata" : {
+  },
+  "global" : {
+  },
+  "components": {
+    "slider-appmaster": {
+    },
+    "MEMCACHED": {
+      "yarn.role.priority": "1",
+      "yarn.component.instances": "1",
+      "yarn.memory": "256"
+    }
+  }
+}
\ No newline at end of file
diff --git a/app-packages/storm/appConfig.json b/app-packages/storm/appConfig.json
index 6d6aa3a..24078cf 100644
--- a/app-packages/storm/appConfig.json
+++ b/app-packages/storm/appConfig.json
@@ -3,7 +3,7 @@
   "metadata": {
   },
   "global": {
-    "application.def": "/slider/storm_v091.zip",
+    "application.def": "package/storm_v091.zip",
     "config_types": "storm-site",
     "java_home": "/usr/jdk64/jdk1.7.0_45",
     "package_list": "files/apache-storm-0.9.1.2.1.1.0-237.tar.gz",
@@ -68,7 +68,7 @@
     "site.storm-site.logviewer.appender.name": "A1",
     "site.storm-site.nimbus.host": "${NIMBUS_HOST}",
     "site.storm-site.ui.port": "${STORM_UI_SERVER.ALLOCATED_PORT}",
-    "site.storm-site.supervisor.slots.ports": "[${SUPERVISOR.ALLOCATED_PORT}]",
+    "site.storm-site.supervisor.slots.ports": "[${SUPERVISOR.ALLOCATED_PORT}{DO_NOT_PROPAGATE},${SUPERVISOR.ALLOCATED_PORT}{DO_NOT_PROPAGATE}]",
     "site.storm-site.nimbus.file.copy.expiration.secs": "600",
     "site.storm-site.supervisor.monitor.frequency.secs": "3",
     "site.storm-site.transactional.zookeeper.servers": "null",
@@ -105,7 +105,7 @@
     "site.storm-site.topology.trident.batch.emit.interval.millis": "500",
     "site.storm-site.topology.builtin.metrics.bucket.size.secs": "60",
     "site.storm-site.storm.thrift.transport": "backtype.storm.security.auth.SimpleTransportPlugin",
-    "site.storm-site.logviewer.port": "0",
+    "site.storm-site.logviewer.port": "${SUPERVISOR.ALLOCATED_PORT}{DO_NOT_PROPAGATE}",
     "site.storm-site.topology.debug": "false"
   },
   "components": {
diff --git a/app-packages/storm/metainfo.xml b/app-packages/storm/metainfo.xml
index 7edd794..dbe8549 100644
--- a/app-packages/storm/metainfo.xml
+++ b/app-packages/storm/metainfo.xml
@@ -22,27 +22,32 @@
     <name>STORM</name>
     <comment>Apache Hadoop Stream processing framework</comment>
     <version>0.9.1.2.1</version>
+    <exportedConfigs>storm-site</exportedConfigs>
 
     <exportGroups>
       <exportGroup>
         <name>QuickLinks</name>
         <exports>
           <export>
-            <name>org.apache.slider.jmx</name>
+            <name>app.jmx</name>
             <value>http://${STORM_REST_API_HOST}:${site.global.rest_api_port}/api/cluster/summary</value>
           </export>
           <export>
-            <name>org.apache.slider.monitor</name>
+            <name>app.monitor</name>
             <value>http://${STORM_UI_SERVER_HOST}:${site.storm-site.ui.port}</value>
           </export>
           <export>
-            <name>org.apache.slider.metrics</name>
+            <name>app.metrics</name>
             <value>http://${site.global.ganglia_server_host}/cgi-bin/rrd.py?c=${site.global.ganglia_server_id}</value>
           </export>
           <export>
-            <name>org.apache.slider.ganglia</name>
+            <name>ganglia.ui</name>
             <value>http://${site.global.ganglia_server_host}/ganglia?c=${site.global.ganglia_server_id}</value>
           </export>
+          <export>
+            <name>nimbus.host_port</name>
+            <value>http://${NIMBUS_HOST}:${site.storm-site.nimbus.thrift.port}</value>
+          </export>
         </exports>
       </exportGroup>
     </exportGroups>
@@ -76,6 +81,8 @@
       <component>
         <name>NIMBUS</name>
         <category>MASTER</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
+        <appExports>QuickLinks-nimbus.host_port,QuickLinks-ganglia.ui,QuickLinks-app.metrics</appExports>
         <commandScript>
           <script>scripts/nimbus.py</script>
           <scriptType>PYTHON</scriptType>
@@ -86,6 +93,8 @@
       <component>
         <name>STORM_REST_API</name>
         <category>MASTER</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
+        <appExports>QuickLinks-app.jmx</appExports>
         <commandScript>
           <script>scripts/rest_api.py</script>
           <scriptType>PYTHON</scriptType>
@@ -96,6 +105,13 @@
       <component>
         <name>SUPERVISOR</name>
         <category>SLAVE</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
+        <componentExports>
+          <componentExport>
+            <name>log_viewer_port</name>
+            <value>${THIS_HOST}:${site.storm-site.logviewer.port}</value>
+          </componentExport>
+        </componentExports>
         <commandScript>
           <script>scripts/supervisor.py</script>
           <scriptType>PYTHON</scriptType>
@@ -107,6 +123,8 @@
         <name>STORM_UI_SERVER</name>
         <category>MASTER</category>
         <publishConfig>true</publishConfig>
+        <appExports>QuickLinks-app.monitor</appExports>
+        <autoStartOnFailure>true</autoStartOnFailure>
         <commandScript>
           <script>scripts/ui_server.py</script>
           <scriptType>PYTHON</scriptType>
@@ -117,6 +135,7 @@
       <component>
         <name>DRPC_SERVER</name>
         <category>MASTER</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
         <commandScript>
           <script>scripts/drpc_server.py</script>
           <scriptType>PYTHON</scriptType>
@@ -136,10 +155,5 @@
         </packages>
       </osSpecific>
     </osSpecifics>
-
-    <configuration-dependencies>
-      <config-type>storm-site</config-type>
-      <config-type>global</config-type>
-    </configuration-dependencies>
   </application>
 </metainfo>
diff --git a/app-packages/storm/package/scripts/service.py b/app-packages/storm/package/scripts/service.py
index 10fa5b9..13fcef2 100644
--- a/app-packages/storm/package/scripts/service.py
+++ b/app-packages/storm/package/scripts/service.py
@@ -22,7 +22,9 @@
 from resource_management import *
 import time
 
-
+"""
+Slider package uses jps as pgrep does not list the whole process start command
+"""
 def service(
     name,
     action='start'):
@@ -30,25 +32,25 @@
   import status_params
 
   pid_file = status_params.pid_files[name]
+  container_id = status_params.container_id
   no_op_test = format("ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1")
+
   jps_path = format("{java64_home}/bin/jps")
-  grep_and_awk = "| grep -v grep | awk '{print $1}'"
+  grep_and_awk = format("| grep {container_id}") + " | awk '{print $1}'"
 
   if name == 'ui':
-    #process_cmd = "^java.+backtype.storm.ui.core$"
-    pid_chk_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.ui.core\" {grep_and_awk}  > {pid_file}")
+    crt_pid_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.ui.core\" {grep_and_awk}  > {pid_file}")
   elif name == "rest_api":
-    process_cmd = format("{java64_home}/bin/java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server")
-    crt_pid_cmd = format("pgrep -f \"{process_cmd}\" && pgrep -f \"{process_cmd}\" > {pid_file}")
+    rest_process_cmd = format("{java64_home}/bin/java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server")
+    crt_pid_cmd = format("pgrep -f \"{rest_process_cmd}\" > {pid_file}")
   else:
-    #process_cmd = format("^java.+backtype.storm.daemon.{name}$")
-    pid_chk_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.daemon.{name}\" {grep_and_awk}  > {pid_file}")
+    crt_pid_cmd = format("{jps_path} -vl | grep \"^[0-9 ]*backtype.storm.daemon.{name}\" {grep_and_awk}  > {pid_file}")
 
   if action == "start":
     if name == "rest_api":
-      cmd = format("{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log")
+      cmd = format("{rest_process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log")
     else:
-      cmd = format("env JAVA_HOME={java64_home} PATH=$PATH:{java64_home}/bin STORM_BASE_DIR={app_root} STORM_CONF_DIR={conf_dir} {storm_bin} {name}")
+      cmd = format("env JAVA_HOME={java64_home} PATH=$PATH:{java64_home}/bin STORM_BASE_DIR={app_root} STORM_CONF_DIR={conf_dir} {storm_bin} {name} > {log_dir}/{name}.out 2>&1")
 
     Execute(cmd,
             not_if=no_op_test,
@@ -67,7 +69,7 @@
     else:
       content = None
       for i in xrange(12):
-        Execute(pid_chk_cmd,
+        Execute(crt_pid_cmd,
                 user=params.storm_user,
                 logoutput=True
         )
diff --git a/app-packages/storm/package/scripts/status_params.py b/app-packages/storm/package/scripts/status_params.py
index eab83cf..5907446 100644
--- a/app-packages/storm/package/scripts/status_params.py
+++ b/app-packages/storm/package/scripts/status_params.py
@@ -21,6 +21,7 @@
 
 config = Script.get_config()
 
+container_id = config['configurations']['global']['app_container_id']
 pid_dir = config['configurations']['global']['app_pid_dir']
 pid_nimbus = format("{pid_dir}/nimbus.pid")
 pid_supervisor = format("{pid_dir}/supervisor.pid")
diff --git a/pom.xml b/pom.xml
index 496951a..9849cdf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,7 +19,7 @@
   <groupId>org.apache.slider</groupId>
   <artifactId>slider</artifactId>
   <name>Slider</name>
-  <version>0.40</version>
+  <version>0.50.0-incubating</version>
   <packaging>pom</packaging>
 
   <description>
@@ -45,7 +45,6 @@
     <module>slider-providers/hbase/hbase-funtests</module>
     <module>slider-providers/accumulo/slider-accumulo-provider</module>
     <module>slider-providers/accumulo/accumulo-funtests</module>
-    <module>slider-install</module>
   </modules>
 
   <licenses>
@@ -94,11 +93,13 @@
 
   <properties>
 
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 
     <!-- 
     Java versions
     -->
-    <project.java.src.version>1.7</project.java.src.version>
+    <project.java.src.version>1.6</project.java.src.version>
     <enforced.java.version>${project.java.src.version}</enforced.java.version>
     <groovy.version>2.2.2</groovy.version>
     
@@ -106,6 +107,7 @@
     test options
     -->
 
+    <slider.conf.dir>${project.basedir}/src/test/clusters/offline/slider</slider.conf.dir>
     <test.forkedProcessTimeoutInSeconds>18000</test.forkedProcessTimeoutInSeconds>
     <test.argLine>-Xmx1024m -XX:+HeapDumpOnOutOfMemoryError</test.argLine>
     <test.reuseForks>false</test.reuseForks>
@@ -116,7 +118,7 @@
     <!--
     core artifacts
     -->
-    <hadoop.version>2.4.0</hadoop.version>
+    <hadoop.version>2.4.1</hadoop.version>
 
     <hbase.version>0.98.4-hadoop2</hbase.version>
     <accumulo.version>1.6.0</accumulo.version>
@@ -142,11 +144,16 @@
 
     <jackson.version>1.9.13</jackson.version>
     <jcommander.version>1.30</jcommander.version>
+
+    <jetty.version>6.1.26</jetty.version>
     <jersey.version>1.9</jersey.version>
     <servlet-api.version>2.5</servlet-api.version>
     <jsr311-api.version>1.1.1</jsr311-api.version>
+    <jaxb-api.version>2.2.7</jaxb-api.version>
+
     <junit.version>4.11</junit.version>
     <log4j.version>1.2.17</log4j.version>
+    <metrics.version>3.0.1</metrics.version>
     <mockito.version>1.8.5</mockito.version>
 
     <!-- ProtocolBuffer version, used to verify the protoc version and -->
@@ -217,9 +224,42 @@
   </pluginRepositories>
   
   <build>
-    
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>${maven-compiler-plugin.version}</version>
+          <configuration>
+            <compilerId>groovy-eclipse-compiler</compilerId>
+            <source>${project.java.src.version}</source>
+            <target>${project.java.src.version}</target>
+            <!-- set verbose to be true if you want lots of uninteresting messages -->
+            <!-- <verbose>true</verbose> -->
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
     <plugins>
       <plugin>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <version>${maven-enforcer-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>enforce-java</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <requireJavaVersion>
+                  <version>[${project.java.src.version},)</version>
+                </requireJavaVersion>
+              </rules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-source-plugin</artifactId>
         <version>${maven-source-plugin.version}</version>
@@ -1081,6 +1121,12 @@
       </dependency>
 
       <dependency>
+        <groupId>javax.xml.bind</groupId>
+        <artifactId>jaxb-api</artifactId>
+        <version>${jaxb-api.version}</version>
+      </dependency>
+
+      <dependency>
         <groupId>com.sun.jersey</groupId>
         <artifactId>jersey-client</artifactId>
         <version>${jersey.version}</version>
@@ -1140,6 +1186,11 @@
         <version>${jersey.version}</version>
       </dependency>
 
+      <dependency>
+        <groupId>com.codahale.metrics</groupId>
+        <artifactId>metrics-core</artifactId>
+        <version>${metrics.version}</version>
+      </dependency>
 
       <!-- ======================================================== -->
       <!-- Mocking -->
@@ -1162,9 +1213,19 @@
       <!-- ======================================================== -->
 
       <dependency>
-	      <groupId>org.mortbay.jetty</groupId>
-	      <artifactId>jetty-sslengine</artifactId>
-        <version>6.1.26</version>
+        <groupId>org.mortbay.jetty</groupId>
+        <artifactId>jetty</artifactId>
+        <version>${jetty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.mortbay.jetty</groupId>
+        <artifactId>jetty-util</artifactId>
+        <version>${jetty.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.mortbay.jetty</groupId>
+        <artifactId>jetty-sslengine</artifactId>
+        <version>${jetty.version}</version>
       </dependency>
 
     </dependencies>
@@ -1230,7 +1291,7 @@
       <!-- hadoop branch-2 builds  -->
       <id>branch-2</id>
       <properties>
-        <hadoop.version>2.5.0-SNAPSHOT</hadoop.version>
+        <hadoop.version>2.6.0-SNAPSHOT</hadoop.version>
       </properties>
     </profile>
     
diff --git a/slider-agent/conf/agent.ini b/slider-agent/conf/agent.ini
index b52bec9..7b9d57d 100644
--- a/slider-agent/conf/agent.ini
+++ b/slider-agent/conf/agent.ini
@@ -28,11 +28,11 @@
 app_dbg_cmd=
 debug_mode_enabled=true
 
-app_task_dir=app/command-log
-app_log_dir=app/log
+app_task_dir=.
+app_log_dir=.
 app_tmp_dir=app/tmp
 
-log_dir=infra/log
+log_dir=.
 run_dir=infra/run
 version_file=infra/version
 
diff --git a/slider-agent/pom.xml b/slider-agent/pom.xml
index 7a3b447..d670f81 100644
--- a/slider-agent/pom.xml
+++ b/slider-agent/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <artifactId>slider-agent</artifactId>
@@ -57,10 +57,6 @@
       </plugin>
       
       <plugin>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.0</version>
-      </plugin>
-      <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>exec-maven-plugin</artifactId>
         <version>1.2</version>
@@ -73,7 +69,7 @@
                 <argument>unitTests.py</argument>
               </arguments>
               <environmentVariables>
-                <PYTHONPATH>${project.basedir}/src/main/python/jinja2:${project.basedir}/src/test/python:${project.basedir}/src/main/python:${project.basedir}/src/main/python/agent:${project.basedir}/src/main/python/resource_management:${project.basedir}/src/test/python/agent:${project.basedir}/src/test/python/resource_management</PYTHONPATH>
+                <PYTHONPATH>${project.basedir}/src/main/python/jinja2:${project.basedir}/src/test/python:${project.basedir}/src/main/python:${project.basedir}/src/main/python/agent:${project.basedir}/src/main/python/resource_management:${project.basedir}/src/test/python/agent:${project.basedir}/src/test/python/resource_management:${project.basedir}/src/main/python/kazoo</PYTHONPATH>
               </environmentVariables>
               <skip>${skipTests}</skip>
             </configuration>
@@ -106,6 +102,8 @@
             <exclude>src/main/python/jinja2/**</exclude>
             <!-- mock files (BSD license) -->
             <exclude>src/test/python/mock/**</exclude>
+            <!-- kazoo files (Apache License, Version 2.0) -->
+            <exclude>src/main/python/kazoo/**</exclude>
           </excludes>
         </configuration>
       </plugin>
diff --git a/slider-agent/src/main/python/agent/ActionQueue.py b/slider-agent/src/main/python/agent/ActionQueue.py
index d4d8bc2..4c45a76 100644
--- a/slider-agent/src/main/python/agent/ActionQueue.py
+++ b/slider-agent/src/main/python/agent/ActionQueue.py
@@ -49,6 +49,7 @@
   FAILED_STATUS = 'FAILED'
 
   STORE_APPLIED_CONFIG = 'record_config'
+  AUTO_RESTART = 'auto_restart'
 
   def __init__(self, config, controller):
     super(ActionQueue, self).__init__()
@@ -120,6 +121,10 @@
     logger.debug(pprint.pformat(command))
 
     taskId = command['taskId']
+
+    # if auto generated then do not report result
+    reportResult = CommandStatusDict.shouldReportResult(command)
+
     # Preparing 'IN_PROGRESS' report
     in_progress_status = self.commandStatuses.generate_report_template(command)
     in_progress_status.update({
@@ -127,12 +132,19 @@
       'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
       'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(
         taskId) + '.json',
-      'status': self.IN_PROGRESS_STATUS
+      'status': self.IN_PROGRESS_STATUS,
+      'reportResult': reportResult
     })
-    self.commandStatuses.put_command_status(command, in_progress_status)
+    self.commandStatuses.put_command_status(command, in_progress_status, reportResult)
+
     store_config = False
     if ActionQueue.STORE_APPLIED_CONFIG in command['commandParams']:
       store_config = 'true' == command['commandParams'][ActionQueue.STORE_APPLIED_CONFIG]
+    store_command = False
+    if 'roleParams' in command and ActionQueue.AUTO_RESTART in command['roleParams']:
+      logger.info("Component has indicated auto-restart. Saving details from START command.")
+      store_command = 'true' == command['roleParams'][ActionQueue.AUTO_RESTART]
+
 
     # running command
     commandresult = self.customServiceOrchestrator.runCommand(command,
@@ -141,7 +153,7 @@
                                                               in_progress_status[
                                                                 'tmperr'],
                                                               True,
-                                                              store_config)
+                                                              store_config or store_command)
     # dumping results
     status = self.COMPLETED_STATUS
     if commandresult[Constants.EXIT_CODE] != 0:
@@ -152,6 +164,7 @@
       'stderr': commandresult['stderr'],
       Constants.EXIT_CODE: commandresult[Constants.EXIT_CODE],
       'status': status,
+      'reportResult': reportResult
     })
     if roleResult['stdout'] == '':
       roleResult['stdout'] = 'None'
@@ -170,7 +183,7 @@
         roleResult['allocatedPorts'] = commandresult[Constants.ALLOCATED_PORTS]
       if Constants.FOLDERS in commandresult:
         roleResult['folders'] = commandresult[Constants.FOLDERS]
-    self.commandStatuses.put_command_status(command, roleResult)
+    self.commandStatuses.put_command_status(command, roleResult, reportResult)
 
   # Store action result to agent response queue
   def result(self):
@@ -184,10 +197,7 @@
       cluster = command['clusterName']
       service = command['serviceName']
       component = command['componentName']
-      reportResult = True
-      if 'auto_generated' in command:
-        reportResult = not command['auto_generated']
-
+      reportResult = CommandStatusDict.shouldReportResult(command)
       component_status = self.customServiceOrchestrator.requestComponentStatus(command)
 
       result = {"componentName": component,
diff --git a/slider-agent/src/main/python/agent/AgentConfig.py b/slider-agent/src/main/python/agent/AgentConfig.py
index 16b924c..e45ba23 100644
--- a/slider-agent/src/main/python/agent/AgentConfig.py
+++ b/slider-agent/src/main/python/agent/AgentConfig.py
@@ -22,6 +22,7 @@
 import StringIO
 import os
 import logging
+import posixpath
 
 logger = logging.getLogger()
 
@@ -32,6 +33,8 @@
 hostname=localhost
 port=8440
 secured_port=8441
+zk_quorum=localhost:2181
+zk_reg_path=/register/org-apache-slider/cl1
 check_path=/ws/v1/slider/agents/
 register_path=/ws/v1/slider/agents/{name}/register
 heartbeat_path=/ws/v1/slider/agents/{name}/heartbeat
@@ -43,11 +46,11 @@
 app_dbg_cmd=
 debug_mode_enabled=true
 
-app_task_dir=app/command-log
-app_log_dir=app/log
+app_task_dir=.
+app_log_dir=.
 app_tmp_dir=app/tmp
 
-log_dir=infra/log
+log_dir=.
 run_dir=infra/run
 version_file=infra/version
 
@@ -141,7 +144,7 @@
       if name in AgentConfig.FOLDER_MAPPING and AgentConfig.FOLDER_MAPPING[
         name] == "LOG":
         root_folder_to_use = self.logroot
-      return os.path.join(root_folder_to_use, relativePath)
+      return posixpath.join(root_folder_to_use, relativePath)
     else:
       return relativePath
 
diff --git a/slider-agent/src/main/python/agent/CommandStatusDict.py b/slider-agent/src/main/python/agent/CommandStatusDict.py
index 9261e29..bcbce9e 100644
--- a/slider-agent/src/main/python/agent/CommandStatusDict.py
+++ b/slider-agent/src/main/python/agent/CommandStatusDict.py
@@ -114,12 +114,14 @@
     grep = Grep()
     output = grep.tail(tmpout, Grep.OUTPUT_LAST_LINES)
     inprogress = self.generate_report_template(command)
+    reportResult = CommandStatusDict.shouldReportResult(command)
     inprogress.update({
       'stdout': grep.filterMarkup(output),
       'stderr': tmperr,
       'structuredOut': tmpstructuredout,
       Constants.EXIT_CODE: 777,
       'status': ActionQueue.IN_PROGRESS_STATUS,
+      'reportResult': reportResult
     })
     return inprogress
 
@@ -140,3 +142,6 @@
     return stub
 
 
+  @staticmethod
+  def shouldReportResult(command):
+    return not (Constants.AUTO_GENERATED in command and command[Constants.AUTO_GENERATED])
diff --git a/slider-agent/src/main/python/agent/Constants.py b/slider-agent/src/main/python/agent/Constants.py
index 88cd564..2975266 100644
--- a/slider-agent/src/main/python/agent/Constants.py
+++ b/slider-agent/src/main/python/agent/Constants.py
@@ -29,4 +29,7 @@
 AGENT_LOG_ROOT = "AGENT_LOG_ROOT"
 DO_NOT_REGISTER = "DO_NOT_REGISTER"
 DO_NOT_HEARTBEAT = "DO_NOT_HEARTBEAT"
-DO_NOT_HEARTBEAT_AFTER_ = "DO_NOT_HEARTBEAT_AFTER_"
\ No newline at end of file
+DO_NOT_HEARTBEAT_AFTER_ = "DO_NOT_HEARTBEAT_AFTER_"
+ZK_QUORUM="zk_quorum"
+ZK_REG_PATH="zk_reg_path"
+AUTO_GENERATED="auto_generated"
diff --git a/slider-agent/src/main/python/agent/Controller.py b/slider-agent/src/main/python/agent/Controller.py
index 92e9086..1e27efa 100644
--- a/slider-agent/src/main/python/agent/Controller.py
+++ b/slider-agent/src/main/python/agent/Controller.py
@@ -34,6 +34,7 @@
 from Register import Register
 from ActionQueue import ActionQueue
 from NetUtil import NetUtil
+from Registry import Registry
 import ssl
 import ProcessHelper
 import Constants
@@ -43,7 +44,12 @@
 logger = logging.getLogger()
 
 AGENT_AUTO_RESTART_EXIT_CODE = 77
+HEART_BEAT_RETRY_THRESHOLD = 2
 
+WS_AGENT_CONTEXT_ROOT = '/ws'
+SLIDER_PATH_AGENTS = WS_AGENT_CONTEXT_ROOT + '/v1/slider/agents/'
+SLIDER_REL_PATH_REGISTER = '/register'
+SLIDER_REL_PATH_HEARTBEAT = '/heartbeat'
 
 class State:
   INIT, INSTALLING, INSTALLED, STARTING, STARTED, FAILED = range(6)
@@ -57,13 +63,12 @@
     self.safeMode = True
     self.credential = None
     self.config = config
-    self.hostname = config.getLabel()
-    server_url = 'https://' + config.get(AgentConfig.SERVER_SECTION,
-                                        'hostname') + \
-                 ':' + config.get(AgentConfig.SERVER_SECTION,
-                                  'secured_port')
-    self.registerUrl = server_url + '/ws/v1/slider/agents/' + self.hostname + '/register'
-    self.heartbeatUrl = server_url + '/ws/v1/slider/agents/' + self.hostname + '/heartbeat'
+    self.label = config.getLabel()
+    self.hostname = config.get(AgentConfig.SERVER_SECTION, 'hostname')
+    self.secured_port = config.get(AgentConfig.SERVER_SECTION, 'secured_port')
+    self.server_url = 'https://' + self.hostname + ':' + self.secured_port
+    self.registerUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_REGISTER
+    self.heartbeatUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_HEARTBEAT
     self.netutil = NetUtil()
     self.responseId = -1
     self.repeatRegistration = False
@@ -80,6 +85,8 @@
     self.componentActualState = State.INIT
     self.statusCommand = None
     self.failureCount = 0
+    self.heartBeatRetryCount = 0
+    self.autoRestart = False
 
 
   def __del__(self):
@@ -111,7 +118,11 @@
 
     while not self.isRegistered:
       try:
-        data = json.dumps(self.register.build(id))
+        data = json.dumps(self.register.build(
+          self.componentActualState,
+          self.componentExpectedState,
+          self.actionQueue.customServiceOrchestrator.allocated_ports,
+          id))
         logger.info("Registering with the server at " + self.registerUrl +
                     " with data " + pprint.pformat(data))
         response = self.sendRequest(self.registerUrl, data)
@@ -204,8 +215,8 @@
       try:
         if not retry:
           data = json.dumps(
-            self.heartbeat.build(commandResult, self.responseId,
-                                 self.hasMappedComponents))
+            self.heartbeat.build(commandResult,
+                                 self.responseId, self.hasMappedComponents))
           self.updateStateBasedOnResult(commandResult)
           logger.debug("Sending request: " + data)
           pass
@@ -218,6 +229,12 @@
 
         serverId = int(response['responseId'])
 
+        restartEnabled = False
+        if 'restartEnabled' in response:
+          restartEnabled = response['restartEnabled']
+          if restartEnabled:
+            logger.info("Component auto-restart is enabled.")
+
         if 'hasMappedComponents' in response.keys():
           self.hasMappedComponents = response['hasMappedComponents'] != False
 
@@ -231,7 +248,8 @@
             return
 
         if serverId != self.responseId + 1:
-          logger.error("Error in responseId sequence - restarting")
+          logger.error("Error in responseId sequence expected " + str(self.responseId + 1)
+                       + " but got " + str(serverId) + " - restarting")
           self.restartAgent()
         else:
           self.responseId = serverId
@@ -250,6 +268,19 @@
           logger.info("No commands sent from the Server.")
           pass
 
+        # Add a start command
+        if self.componentActualState == State.FAILED and \
+                self.componentExpectedState == State.STARTED and restartEnabled:
+          stored_command = self.actionQueue.customServiceOrchestrator.stored_command
+          if len(stored_command) > 0:
+            auto_start_command = self.create_start_command(stored_command)
+            if auto_start_command:
+              logger.info("Automatically adding a start command.")
+              logger.debug("Auto start command: " + pprint.pformat(auto_start_command))
+              self.updateStateBasedOnCommand([auto_start_command], False)
+              self.addToQueue([auto_start_command])
+          pass
+
         # Add a status command
         if (self.componentActualState != State.STARTING and \
                 self.componentExpectedState == State.STARTED) and \
@@ -285,9 +316,33 @@
             print(
               "Server certificate verify failed. Did you regenerate server certificate?")
             certVerifFailed = True
+        self.heartBeatRetryCount += 1
+        logger.error(
+          "Heartbeat retry count = %d" % (self.heartBeatRetryCount))
+        # Re-read zk registry in case AM was restarted and came up with new 
+        # host/port, but do this only after heartbeat retry attempts crosses
+        # threshold
+        if self.heartBeatRetryCount > HEART_BEAT_RETRY_THRESHOLD:
+          self.isRegistered = False
+          self.repeatRegistration = True
+          self.heartBeatRetryCount = 0
+          self.cachedconnect = None # Previous connection is broken now
+          zk_quorum = self.config.get(AgentConfig.SERVER_SECTION, Constants.ZK_QUORUM)
+          zk_reg_path = self.config.get(AgentConfig.SERVER_SECTION, Constants.ZK_REG_PATH)
+          registry = Registry(zk_quorum, zk_reg_path)
+          amHost, amSecuredPort = registry.readAMHostPort()
+          logger.info("Read from ZK registry: AM host = %s, AM secured port = %s" % (amHost, amSecuredPort))
+          self.hostname = amHost
+          self.secured_port = amSecuredPort
+          self.config.set(AgentConfig.SERVER_SECTION, "hostname", self.hostname)
+          self.config.set(AgentConfig.SERVER_SECTION, "secured_port", self.secured_port)
+          self.server_url = 'https://' + self.hostname + ':' + self.secured_port
+          self.registerUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_REGISTER
+          self.heartbeatUrl = self.server_url + SLIDER_PATH_AGENTS + self.label + SLIDER_REL_PATH_HEARTBEAT
+          return
         self.cachedconnect = None # Previous connection is broken now
         retry = True
-        # Sleep for some time
+      # Sleep for some time
       timeout = self.netutil.HEARTBEAT_IDDLE_INTERVAL_SEC \
                 - self.netutil.MINIMUM_INTERVAL_BETWEEN_HEARTBEATS
       self.heartbeat_wait_event.wait(timeout=timeout)
@@ -297,13 +352,25 @@
     pass
     logger.info("Controller stopped heart-beating.")
 
-  def updateStateBasedOnCommand(self, commands):
+
+  def create_start_command(self, stored_command):
+    taskId = int(stored_command['taskId'])
+    taskId = taskId + 1
+    stored_command['taskId'] = taskId
+    stored_command['commandId'] = "{0}-1".format(taskId)
+    stored_command[Constants.AUTO_GENERATED] = True
+    return stored_command
+    pass
+
+
+  def updateStateBasedOnCommand(self, commands, createStatus=True):
     for command in commands:
       if command["roleCommand"] == "START":
         self.componentExpectedState = State.STARTED
         self.componentActualState = State.STARTING
         self.failureCount = 0
-        self.statusCommand = self.createStatusCommand(command)
+        if createStatus:
+          self.statusCommand = self.createStatusCommand(command)
 
       if command["roleCommand"] == "INSTALL":
         self.componentExpectedState = State.INSTALLED
@@ -329,6 +396,7 @@
 
       if "healthStatus" in commandResult:
         if commandResult["healthStatus"] == "INSTALLED":
+          # Mark it FAILED as its a failure remedied by auto-start or container restart
           self.componentActualState = State.FAILED
           self.failureCount += 1
           self.logStates()
@@ -357,9 +425,9 @@
     statusCommand["hostLevelParams"] = command["hostLevelParams"]
     statusCommand["serviceName"] = command["serviceName"]
     statusCommand["taskId"] = "status"
-    statusCommand['auto_generated'] = True
-    return statusCommand
+    statusCommand[Constants.AUTO_GENERATED] = True
     logger.info("Status command: " + pprint.pformat(statusCommand))
+    return statusCommand
     pass
 
 
diff --git a/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py b/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py
index 6296033..15f1664 100644
--- a/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py
+++ b/slider-agent/src/main/python/agent/CustomServiceOrchestrator.py
@@ -24,6 +24,8 @@
 import pprint
 import sys
 import socket
+import posixpath
+import platform
 from AgentConfig import AgentConfig
 from AgentException import AgentException
 from PythonExecutor import PythonExecutor
@@ -49,25 +51,26 @@
     self.config = config
     self.tmp_dir = config.getResolvedPath(AgentConfig.APP_TASK_DIR)
     self.python_executor = PythonExecutor(self.tmp_dir, config)
-    self.status_commands_stdout = os.path.join(self.tmp_dir,
-                                               'status_command_stdout.txt')
-    self.status_commands_stderr = os.path.join(self.tmp_dir,
-                                               'status_command_stderr.txt')
+    self.status_commands_stdout = os.path.realpath(posixpath.join(self.tmp_dir,
+                                                                  'status_command_stdout.txt'))
+    self.status_commands_stderr = os.path.realpath(posixpath.join(self.tmp_dir,
+                                                                  'status_command_stderr.txt'))
     self.public_fqdn = hostname.public_hostname()
-    self.applied_configs = {}
+    self.stored_command = {}
+    self.allocated_ports = {}
     # Clean up old status command files if any
     try:
       os.unlink(self.status_commands_stdout)
       os.unlink(self.status_commands_stderr)
     except OSError:
       pass # Ignore fail
-    self.base_dir = os.path.join(
-      config.getResolvedPath(AgentConfig.APP_PACKAGE_DIR), "package")
+    self.base_dir = os.path.realpath(posixpath.join(
+      config.getResolvedPath(AgentConfig.APP_PACKAGE_DIR), "package"))
 
 
   def runCommand(self, command, tmpoutfile, tmperrfile,
-                 override_output_files=True, store_config=False):
-    allocated_port = {}
+                 override_output_files=True, store_command=False):
+    allocated_ports = {}
     try:
       script_type = command['commandParams']['script_type']
       script = command['commandParams']['script']
@@ -78,13 +81,13 @@
       script_path = self.resolve_script_path(self.base_dir, script, script_type)
       script_tuple = (script_path, self.base_dir)
 
-      tmpstrucoutfile = os.path.join(self.tmp_dir,
-                                     "structured-out-{0}.json".format(task_id))
+      tmpstrucoutfile = os.path.realpath(posixpath.join(self.tmp_dir,
+                                                        "structured-out-{0}.json".format(task_id)))
       if script_type.upper() != self.SCRIPT_TYPE_PYTHON:
       # We don't support anything else yet
         message = "Unknown script type {0}".format(script_type)
         raise AgentException(message)
-      json_path = self.dump_command_to_json(command, allocated_port, store_config)
+      json_path = self.dump_command_to_json(command, allocated_ports, store_command)
       py_file_list = [script_tuple]
       # filter None values
       filtered_py_file_list = [i for i in py_file_list if i]
@@ -94,11 +97,15 @@
       ret = None
       for py_file, current_base_dir in filtered_py_file_list:
         script_params = [command_name, json_path, current_base_dir]
-        python_paths = [os.path.join(self.config.getWorkRootPath(),
-                                     "infra/agent/slider-agent/jinja2"),
-                        os.path.join(self.config.getWorkRootPath(),
-                                     "infra/agent/slider-agent")]
-        environment_vars = [("PYTHONPATH", ":".join(python_paths))]
+        python_paths = [os.path.realpath(posixpath.join(self.config.getWorkRootPath(),
+                                                        "infra", "agent", "slider-agent", "jinja2")),
+                        os.path.realpath(posixpath.join(self.config.getWorkRootPath(),
+                                                        "infra", "agent", "slider-agent"))]
+        if platform.system() != "Windows":
+          environment_vars = [("PYTHONPATH", ":".join(python_paths))]
+        else:
+          environment_vars = [("PYTHONPATH", ";".join(python_paths))]
+
         ret = self.python_executor.run_file(py_file, script_params,
                                             tmpoutfile, tmperrfile, timeout,
                                             tmpstrucoutfile,
@@ -126,13 +133,14 @@
       }
 
     if Constants.EXIT_CODE in ret and ret[Constants.EXIT_CODE] == 0:
-      ret[Constants.ALLOCATED_PORTS] = allocated_port
+      ret[Constants.ALLOCATED_PORTS] = allocated_ports
+      self.allocated_ports = allocated_ports
 
     # Irrespective of the outcome report the folder paths
     if command_name == 'INSTALL':
       ret[Constants.FOLDERS] = {
-        Constants.AGENT_LOG_ROOT : self.config.getLogPath(),
-        Constants.AGENT_WORK_ROOT : self.config.getWorkRootPath()
+        Constants.AGENT_LOG_ROOT: self.config.getLogPath(),
+        Constants.AGENT_WORK_ROOT: self.config.getWorkRootPath()
       }
     return ret
 
@@ -141,29 +149,35 @@
     """
     Encapsulates logic of script location determination.
     """
-    path = os.path.join(base_dir, script)
+    path = os.path.realpath(posixpath.join(base_dir, script))
     if not os.path.exists(path):
       message = "Script {0} does not exist".format(path)
       raise AgentException(message)
     return path
 
   def getConfig(self, command):
-    if 'commandParams' in command and 'config_type' in command['commandParams']:
-      config_type = command['commandParams']['config_type']
-      logger.info("Requesting applied config for type {0}".format(config_type))
-      if config_type in self.applied_configs:
-        return {
-          'configurations': {config_type: self.applied_configs[config_type]}
-        }
+    if 'configurations' in self.stored_command:
+      if 'commandParams' in command and 'config_type' in command['commandParams']:
+        config_type = command['commandParams']['config_type']
+        logger.info("Requesting applied config for type {0}".format(config_type))
+        if config_type in self.stored_command['configurations']:
+          return {
+            'configurations': {config_type: self.stored_command['configurations'][config_type]}
+          }
+        else:
+          return {
+            'configurations': {}
+          }
+        pass
       else:
+        logger.info("Requesting all applied config.")
         return {
-          'configurations': {}
+          'configurations': self.stored_command['configurations']
         }
       pass
     else:
-      logger.info("Requesting all applied config.")
       return {
-        'configurations': self.applied_configs
+        'configurations': {}
       }
     pass
 
@@ -178,7 +192,7 @@
       override_output_files = False
 
     if command['roleCommand'] == "GET_CONFIG":
-      return self.getConfig(command)
+     return self.getConfig(command)
 
     else:
       res = self.runCommand(command, self.status_commands_stdout,
@@ -192,7 +206,7 @@
       return res
     pass
 
-  def dump_command_to_json(self, command, allocated_ports, store_config=False):
+  def dump_command_to_json(self, command, allocated_ports, store_command=False):
     """
     Converts command to json file and returns file path
     """
@@ -207,15 +221,15 @@
     if command_type == ActionQueue.STATUS_COMMAND:
       # These files are frequently created, thats why we don't
       # store them all, but only the latest one
-      file_path = os.path.join(self.tmp_dir, "status_command.json")
+      file_path = os.path.realpath(posixpath.join(self.tmp_dir, "status_command.json"))
     else:
       task_id = command['taskId']
-      file_path = os.path.join(self.tmp_dir, "command-{0}.json".format(task_id))
+      file_path = os.path.realpath(posixpath.join(self.tmp_dir, "command-{0}.json".format(task_id)))
       # Json may contain passwords, that's why we need proper permissions
     if os.path.isfile(file_path):
       os.unlink(file_path)
 
-    self.finalize_command(command, store_config, allocated_ports)
+    self.finalize_command(command, store_command, allocated_ports)
 
     with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT,
                            0600), 'w') as f:
@@ -227,12 +241,17 @@
   patch content
   ${AGENT_WORK_ROOT} -> AgentConfig.getWorkRootPath()
   ${AGENT_LOG_ROOT} -> AgentConfig.getLogPath()
+  ALLOCATED_PORT is a hint to allocate port. It works as follows:
+  Its of the form {component_name.ALLOCATED_PORT}[{DEFAULT_default_port}][{DO_NOT_PROPAGATE}]
+  Either a port gets allocated or if not then just set the value to "0"
   """
 
-  def finalize_command(self, command, store_config, allocated_ports):
+  def finalize_command(self, command, store_command, allocated_ports):
     component = command['componentName']
-    allocated_port_format = "${{{0}.ALLOCATED_PORT}}"
-    port_allocation_req = allocated_port_format.format(component)
+    allocated_for_this_component_format = "${{{0}.ALLOCATED_PORT}}"
+    allocated_for_any = ".ALLOCATED_PORT}"
+
+    port_allocation_req = allocated_for_this_component_format.format(component)
     if 'configurations' in command:
       for key in command['configurations']:
         if len(command['configurations'][key]) > 0:
@@ -243,10 +262,12 @@
               value = value.replace("${AGENT_LOG_ROOT}",
                                     self.config.getLogPath())
               if port_allocation_req in value:
-                port = self.allocate_port()
-                value = value.replace(port_allocation_req, str(port))
-                logger.info("Allocated port " + str(port) + " for " + port_allocation_req)
-                allocated_ports[k] = value
+                value = self.allocate_ports(value, port_allocation_req)
+                allocated_ports[key + "." + k] = value
+              elif allocated_for_any in value:
+                ## All unallocated ports should be set to 0
+                logger.info("Assigning port 0 " + "for " + value)
+                value = self.set_all_unallocated_ports(value)
               command['configurations'][key][k] = value
               pass
             pass
@@ -254,13 +275,83 @@
         pass
       pass
 
-    if store_config:
+    if store_command:
       logger.info("Storing applied config: " + pprint.pformat(command['configurations']))
-      self.applied_configs = command['configurations']
+      self.stored_command = command
 
   pass
 
-  def allocate_port(self):
+  """
+  All unallocated ports should be set to 0
+  Look for "${SOME_COMPONENT_NAME.ALLOCATED_PORT}"
+        or "${component.ALLOCATED_PORT}{DEFAULT_port}"
+        or "${component.ALLOCATED_PORT}{DEFAULT_port}{DO_NOT_PROPAGATE}"
+  """
+
+  def set_all_unallocated_ports(self, value):
+    pattern_start = "${"
+    sub_section_start = "}{"
+    pattern_end = "}"
+    index = value.find(pattern_start)
+    while index != -1:
+      replace_index_start = index
+      replace_index_end = value.find(pattern_end, replace_index_start)
+      next_pattern_start = value.find(sub_section_start, replace_index_start)
+      while next_pattern_start == replace_index_end:
+        replace_index_end = value.find(pattern_end, replace_index_end + 1)
+        next_pattern_start = value.find(sub_section_start, next_pattern_start + 1)
+        pass
+
+      value = value[:replace_index_start] + "0" + value[replace_index_end + 1:]
+
+      # look for the next
+      index = value.find(pattern_start)
+
+    return value
+    pass
+
+  """
+  Port allocation can asks for multiple dynamic ports
+  port_req_pattern is of type ${component_name.ALLOCATED_PORT}
+    append {DEFAULT_ and find the default value
+    append {DO_NOT_PROPAGATE} if it exists
+  """
+  def allocate_ports(self, value, port_req_pattern):
+    default_port_pattern = "{DEFAULT_"
+    do_not_propagate_pattern = "{DO_NOT_PROPAGATE}"
+    index = value.find(port_req_pattern)
+    while index != -1:
+      replaced_pattern = port_req_pattern
+      def_port = None
+      if index == value.find(port_req_pattern + default_port_pattern):
+        replaced_pattern = port_req_pattern + default_port_pattern
+        start_index = index + len(replaced_pattern)
+        end_index = value.find("}", start_index)
+        def_port_str = value[start_index:end_index]
+        def_port = int(def_port_str)
+        # default value of 0 means allocate any dynamic port
+        if def_port == 0:
+          def_port = None
+
+        replaced_pattern = replaced_pattern + def_port_str + "}"
+        pass
+      if index == value.find(replaced_pattern + do_not_propagate_pattern):
+        replaced_pattern = replaced_pattern + do_not_propagate_pattern
+        pass
+      port = self.allocate_port(def_port)
+      value = value.replace(replaced_pattern, str(port), 1)
+      logger.info("Allocated port " + str(port) + " for " + replaced_pattern)
+      index = value.find(port_req_pattern)
+      pass
+    return value
+    pass
+
+
+  def allocate_port(self, default_port=None):
+    if default_port != None:
+      if self.is_port_available(default_port):
+        return default_port
+
     MAX_ATTEMPT = 5
     iter = 0
     port = -1
@@ -278,4 +369,14 @@
     logger.info("Allocated dynamic port: " + str(port))
     return port
 
+  def is_port_available(self, port):
+    try:
+      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+      sock.settimeout(0.2)
+      sock.connect(('127.0.0.1', port))
+      sock.close()
+    except:
+      return True
+    return False
+
 
diff --git a/slider-agent/src/main/python/agent/Heartbeat.py b/slider-agent/src/main/python/agent/Heartbeat.py
index 8192348..b107d92 100644
--- a/slider-agent/src/main/python/agent/Heartbeat.py
+++ b/slider-agent/src/main/python/agent/Heartbeat.py
@@ -36,7 +36,8 @@
     self.config = config
     self.reports = []
 
-  def build(self, commandResult, id='-1', componentsMapped=False):
+  def build(self, commandResult, id='-1',
+            componentsMapped=False):
     timestamp = int(time.time() * 1000)
     queueResult = self.actionQueue.result()
     logger.info("Queue result: " + pformat(queueResult))
@@ -55,7 +56,15 @@
     if not self.actionQueue.commandQueue.empty():
       commandsInProgress = True
     if len(queueResult) != 0:
-      heartbeat['reports'] = queueResult['reports']
+      heartbeat['reports'] = []
+      for report in queueResult['reports']:
+        if report['reportResult']:
+          del report['reportResult']
+          heartbeat['reports'].append(report)
+        else:
+          # dropping the result but only recording the status
+          commandResult["commandStatus"] = report["status"]
+          pass
       if len(heartbeat['reports']) > 0:
         # There may be IN_PROGRESS tasks
         commandsInProgress = True
diff --git a/slider-agent/src/main/python/agent/ProcessHelper.py b/slider-agent/src/main/python/agent/ProcessHelper.py
index b6283b0..467c4d8 100644
--- a/slider-agent/src/main/python/agent/ProcessHelper.py
+++ b/slider-agent/src/main/python/agent/ProcessHelper.py
@@ -22,12 +22,13 @@
 import logging
 import traceback
 import sys
+import posixpath
 from shell import getTempFiles
 
 logger = logging.getLogger()
 
 if 'AGENT_WORK_ROOT' in os.environ:
-  pidfile = os.path.join(os.environ['AGENT_WORK_ROOT'], "infra/run/agent.pid")
+  pidfile = os.path.realpath(posixpath.join(os.environ['AGENT_WORK_ROOT'], "infra", "run", "agent.pid"))
 else:
   pidfile = None
 
diff --git a/slider-agent/src/main/python/agent/PythonExecutor.py b/slider-agent/src/main/python/agent/PythonExecutor.py
index 5f29e5e..54ce247 100644
--- a/slider-agent/src/main/python/agent/PythonExecutor.py
+++ b/slider-agent/src/main/python/agent/PythonExecutor.py
@@ -28,6 +28,7 @@
 from Grep import Grep
 import shell
 import sys
+import platform
 import Constants
 
 
@@ -125,6 +126,7 @@
     Creates subprocess with given parameters. This functionality was moved to separate method
     to make possible unit testing
     """
+    close_fds = None if platform.system() == "Windows" else True
     env = os.environ.copy()
     if environment_vars:
       for k, v in environment_vars:
@@ -132,13 +134,14 @@
         env[k] = v
     return subprocess.Popen(command,
                             stdout=tmpout,
-                            stderr=tmperr, close_fds=True, env=env)
+                            stderr=tmperr, close_fds=close_fds, env=env)
 
   def isSuccessfull(self, returncode):
     return not self.python_process_has_been_killed and returncode == 0
 
   def python_command(self, script, script_params):
-    python_binary = sys.executable
+    #we need manually pass python executable on windows because sys.executable will return service wrapper
+    python_binary = os.environ['PYTHON_EXE'] if 'PYTHON_EXE' in os.environ else sys.executable
     python_command = [python_binary, "-S", script] + script_params
     return python_command
 
diff --git a/slider-agent/src/main/python/agent/Register.py b/slider-agent/src/main/python/agent/Register.py
index 7c7ff06..b59154f 100644
--- a/slider-agent/src/main/python/agent/Register.py
+++ b/slider-agent/src/main/python/agent/Register.py
@@ -29,7 +29,7 @@
   def __init__(self, config):
     self.config = config
 
-  def build(self, id='-1'):
+  def build(self, actualState, expectedState, allocated_ports, id='-1'):
     timestamp = int(time.time() * 1000)
 
     version = self.read_agent_version()
@@ -38,7 +38,10 @@
                 'timestamp': timestamp,
                 'hostname': self.config.getLabel(),
                 'publicHostname': hostname.public_hostname(),
-                'agentVersion': version
+                'agentVersion': version,
+                'actualState': actualState,
+                'expectedState': expectedState,
+                'allocatedPorts': allocated_ports
     }
     return register
 
diff --git a/slider-agent/src/main/python/agent/Registry.py b/slider-agent/src/main/python/agent/Registry.py
new file mode 100644
index 0000000..37736fe
--- /dev/null
+++ b/slider-agent/src/main/python/agent/Registry.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+'''
+
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import json
+import logging
+from kazoo.client import KazooClient
+
+logger = logging.getLogger()
+
+class Registry:
+  def __init__(self, zk_quorum, zk_reg_path):
+    self.zk_quorum = zk_quorum
+    self.zk_reg_path = zk_reg_path
+
+  def readAMHostPort(self):
+    amHost = ""
+    amSecuredPort = ""
+    zk = None
+    try:
+      zk = KazooClient(hosts=self.zk_quorum, read_only=True)
+      zk.start()
+      data, stat = zk.get(self.zk_reg_path)
+      logger.debug("Registry Data: %s" % (data.decode("utf-8")))
+      sliderRegistry = json.loads(data)
+      amUrl = sliderRegistry["payload"]["internalView"]["endpoints"]["org.apache.slider.agents"]["address"]
+      amHost = amUrl.split("/")[2].split(":")[0]
+      amSecuredPort = amUrl.split(":")[2].split("/")[0]
+      # the port needs to be utf-8 encoded 
+      amSecuredPort = amSecuredPort.encode('utf8', 'ignore')
+    except Exception:
+      # log and let empty strings be returned
+      logger.error("Could not connect to zk registry at %s in quorum %s" % 
+                   (self.zk_reg_path, self.zk_quorum))
+      pass
+    finally:
+      if not zk == None:
+        zk.stop()
+        zk.close()
+    logger.info("AM Host = %s, AM Secured Port = %s" % (amHost, amSecuredPort))
+    return amHost, amSecuredPort
diff --git a/slider-agent/src/main/python/agent/main.py b/slider-agent/src/main/python/agent/main.py
index 12e07ba..f68db04 100644
--- a/slider-agent/src/main/python/agent/main.py
+++ b/slider-agent/src/main/python/agent/main.py
@@ -26,13 +26,19 @@
 import traceback
 import os
 import time
-import errno
+import platform
+import ConfigParser
 import ProcessHelper
+import errno
+import posixpath
 from Controller import Controller
 from AgentConfig import AgentConfig
 from NetUtil import NetUtil
+from Registry import Registry
+import Constants
 
 logger = logging.getLogger()
+IS_WINDOWS = platform.system() == "Windows"
 formatstr = "%(levelname)s %(asctime)s %(filename)s:%(lineno)d - %(message)s"
 agentPid = os.getpid()
 
@@ -99,12 +105,13 @@
 def bind_signal_handlers():
   signal.signal(signal.SIGINT, signal_handler)
   signal.signal(signal.SIGTERM, signal_handler)
-  signal.signal(signal.SIGUSR1, debug)
+  if platform.system() != "Windows":
+    signal.signal(signal.SIGUSR1, debug)
 
 
 def update_config_from_file(agentConfig):
   try:
-    configFile = os.path.join(agentConfig.getWorkRootPath(), configFileRelPath)
+    configFile = posixpath.join(agentConfig.getWorkRootPath(), configFileRelPath)
     if os.path.exists(configFile):
       agentConfig.setConfig(configFile)
     else:
@@ -136,7 +143,7 @@
 
 def ensure_path_exists(path):
   try:
-    os.makedirs(path)
+    os.makedirs(os.path.realpath(path))
   except OSError as exception:
     if exception.errno != errno.EEXIST:
       raise
@@ -173,46 +180,58 @@
   parser = OptionParser()
   parser.add_option("-v", "--verbose", dest="verbose", help="verbose log output", default=False)
   parser.add_option("-l", "--label", dest="label", help="label of the agent", default=None)
-  parser.add_option("--host", dest="host", help="AppMaster host", default=None)
-  parser.add_option("--port", dest="port", help="AppMaster port", default=None)
-  parser.add_option("--secured_port", dest="secured_port", help="AppMaster 2 Way port", default=None)
+  parser.add_option("--zk-quorum", dest=Constants.ZK_QUORUM, help="Zookeeper Quorum", default=None)
+  parser.add_option("--zk-reg-path", dest=Constants.ZK_REG_PATH, help="Zookeeper Registry Path", default=None)
   parser.add_option("--debug", dest="debug", help="Agent debug hint", default="")
   (options, args) = parser.parse_args()
 
   if not 'AGENT_WORK_ROOT' in os.environ:
-    parser.error("AGENT_WORK_ROOT environment variable must be set.");
+    parser.error("AGENT_WORK_ROOT environment variable must be set.")
   options.root_folder = os.environ['AGENT_WORK_ROOT']
   if not 'AGENT_LOG_ROOT' in os.environ:
-    parser.error("AGENT_LOG_ROOT environment variable must be set.");
+    parser.error("AGENT_LOG_ROOT environment variable must be set.")
   options.log_folder = os.environ['AGENT_LOG_ROOT']
+  all_log_folders = [x.strip() for x in options.log_folder.split(',')]
+  if len(all_log_folders) > 1:
+    options.log_folder = all_log_folders[0]
+
+  # If there are multiple log folder, separate by comma, pick one
+
   if not options.label:
     parser.error("label is required.");
 
-  bind_signal_handlers()
+  if not IS_WINDOWS:
+    bind_signal_handlers()
 
   # Check for configuration file.
   agentConfig = AgentConfig(options.root_folder, options.log_folder, options.label)
   update_config_from_file(agentConfig)
 
   # update configurations if needed
-  if options.host:
-      agentConfig.set(AgentConfig.SERVER_SECTION, "hostname", options.host)
+  if options.zk_quorum:
+      agentConfig.set(AgentConfig.SERVER_SECTION, Constants.ZK_QUORUM, options.zk_quorum)
 
-  if options.port:
-      agentConfig.set(AgentConfig.SERVER_SECTION, "port", options.port)
-
-  if options.secured_port:
-      agentConfig.set(AgentConfig.SERVER_SECTION, "secured_port", options.secured_port)
+  if options.zk_reg_path:
+      agentConfig.set(AgentConfig.SERVER_SECTION, Constants.ZK_REG_PATH, options.zk_reg_path)
 
   if options.debug:
     agentConfig.set(AgentConfig.AGENT_SECTION, AgentConfig.APP_DBG_CMD, options.debug)
 
+  # Extract the AM hostname and secured port from ZK registry
+  registry = Registry(options.zk_quorum, options.zk_reg_path)
+  amHost, amSecuredPort = registry.readAMHostPort()
+  if amHost:
+      agentConfig.set(AgentConfig.SERVER_SECTION, "hostname", amHost)
+
+  if amSecuredPort:
+      agentConfig.set(AgentConfig.SERVER_SECTION, "secured_port", amSecuredPort)
+
   # set the security directory to a subdirectory of the run dir
-  secDir = os.path.join(agentConfig.getResolvedPath(AgentConfig.RUN_DIR), "security")
+  secDir = posixpath.join(agentConfig.getResolvedPath(AgentConfig.RUN_DIR), "security")
   logger.info("Security/Keys directory: " + secDir)
   agentConfig.set(AgentConfig.SECURITY_SECTION, "keysdir", secDir)
 
-  logFile = os.path.join(agentConfig.getResolvedPath(AgentConfig.LOG_DIR), logFileName)
+  logFile = posixpath.join(agentConfig.getResolvedPath(AgentConfig.LOG_DIR), logFileName)
 
   perform_prestart_checks(agentConfig)
   ensure_folder_layout(agentConfig)
@@ -226,9 +245,12 @@
   logger.info("Using AGENT_WORK_ROOT = " + options.root_folder)
   logger.info("Using AGENT_LOG_ROOT = " + options.log_folder)
 
+  if len(all_log_folders) > 1:
+    logger.info("Selected log folder from available: " + ",".join(all_log_folders))
+
   server_url = SERVER_STATUS_URL.format(
     agentConfig.get(AgentConfig.SERVER_SECTION, 'hostname'),
-    agentConfig.get(AgentConfig.SERVER_SECTION, 'port'),
+    agentConfig.get(AgentConfig.SERVER_SECTION, 'secured_port'),
     agentConfig.get(AgentConfig.SERVER_SECTION, 'check_path'))
   print("Connecting to the server at " + server_url + "...")
   logger.info('Connecting to the server at: ' + server_url)
diff --git a/slider-agent/src/main/python/agent/shell.py b/slider-agent/src/main/python/agent/shell.py
index d339764..446dde9 100644
--- a/slider-agent/src/main/python/agent/shell.py
+++ b/slider-agent/src/main/python/agent/shell.py
@@ -28,16 +28,19 @@
 import time
 import traceback
 import pprint
+import platform
 
-try:
+if platform.system() != "Windows":
+  try:
     import pwd
-except ImportError:
+  except ImportError:
     import winpwd as pwd
 
 global serverTracker
 serverTracker = {}
 logger = logging.getLogger()
 
+shellRunner = None
 threadLocal = threading.local()
 gracefull_kill_delay = 5 # seconds between SIGTERM and SIGKILL
 tempFiles = [] 
@@ -47,7 +50,51 @@
 def getTempFiles():
   return tempFiles
 
-def kill_process_with_children(parent_pid):
+class _dict_to_object:
+  def __init__(self, entries):
+    self.__dict__.update(entries)
+  def __getitem__(self, item):
+    return self.__dict__[item]
+# windows specific code
+def _kill_process_with_children_windows(parent_pid):
+  shellRunner().run(["taskkill", "/T", "/PID", "{0}".format(parent_pid)])
+
+
+class shellRunnerWindows:
+  # Run any command
+  def run(self, script, user=None):
+    logger.warn("user argument ignored on windows")
+    code = 0
+    if not isinstance(script, list):
+      cmd = " "
+      cmd = cmd.join(script)
+    else:
+      cmd = script
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=False)
+    out, err = p.communicate()
+    code = p.wait()
+    logger.debug("Exitcode for %s is %d" % (cmd, code))
+    return {'exitCode': code, 'output': out, 'error': err}
+
+  def runPowershell(self, file=None, script_block=None, args=[]):
+    logger.warn("user argument ignored on windows")
+    code = 0
+    cmd = None
+    if file:
+      cmd = ['powershell', '-WindowStyle', 'Hidden', '-File', file] + args
+    elif script_block:
+      cmd = ['powershell', '-WindowStyle', 'Hidden', '-Command', script_block] + args
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=False)
+    out, err = p.communicate()
+    code = p.wait()
+    logger.debug("Exitcode for %s is %d" % (cmd, code))
+    return _dict_to_object({'exitCode': code, 'output': out, 'error': err})
+
+
+#linux specific code
+def _kill_process_with_children_linux(parent_pid):
   def kill_tree_function(pid, signal):
     '''
     Kills process tree starting from a given pid.
@@ -57,15 +104,15 @@
     # a given PID and then passes list of "kill -<SIGNAL> PID" commands to 'sh'
     # shell.
     CMD = """ps xf | awk -v PID=""" + str(pid) + \
-        """ ' $1 == PID { P = $1; next } P && /_/ { P = P " " $1;""" + \
-        """K=P } P && !/_/ { P="" }  END { print "kill -""" \
-        + str(signal) + """ "K }' | sh """
+          """ ' $1 == PID { P = $1; next } P && /_/ { P = P " " $1;""" + \
+          """K=P } P && !/_/ { P="" }  END { print "kill -""" \
+          + str(signal) + """ "K }' | sh """
     process = subprocess.Popen(CMD, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, shell=True)
     process.communicate()
-  run_kill_function(kill_tree_function, parent_pid)
+  _run_kill_function(kill_tree_function, parent_pid)
 
-def run_kill_function(kill_function, pid):
+def _run_kill_function(kill_function, pid):
   try:
     kill_function(pid, signal.SIGTERM)
   except Exception, e:
@@ -80,8 +127,43 @@
     logger.error("Failed to send SIGKILL to PID %d. Process exited?" % (pid))
     logger.error("Reported error: " + repr(e))
 
-def changeUid():
+def _changeUid():
   try:
     os.setuid(threadLocal.uid)
   except Exception:
-    logger.warn("can not switch user for running command.")
\ No newline at end of file
+    logger.warn("can not switch user for running command.")
+
+
+class shellRunnerLinux:
+  # Run any command
+  def run(self, script, user=None):
+    try:
+      if user != None:
+        user = pwd.getpwnam(user)[2]
+      else:
+        user = os.getuid()
+      threadLocal.uid = user
+    except Exception:
+      logger.warn("can not switch user for RUN_COMMAND.")
+    code = 0
+    cmd = " "
+    cmd = cmd.join(script)
+    p = subprocess.Popen(cmd, preexec_fn=_changeUid, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=True, close_fds=True)
+    out, err = p.communicate()
+    code = p.wait()
+    logger.debug("Exitcode for %s is %d" % (cmd, code))
+    return {'exitCode': code, 'output': out, 'error': err}
+
+
+def kill_process_with_children(parent_pid):
+  if platform.system() == "Windows":
+    _kill_process_with_children_windows(parent_pid)
+  else:
+    _kill_process_with_children_linux(parent_pid)
+
+
+if platform.system() == "Windows":
+  shellRunner = shellRunnerWindows
+else:
+  shellRunner = shellRunnerLinux
\ No newline at end of file
diff --git a/slider-agent/src/main/python/kazoo/LICENSE b/slider-agent/src/main/python/kazoo/LICENSE
new file mode 100644
index 0000000..68c771a
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/LICENSE
@@ -0,0 +1,176 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
diff --git a/slider-agent/src/main/python/kazoo/__init__.py b/slider-agent/src/main/python/kazoo/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/client.py b/slider-agent/src/main/python/kazoo/client.py
new file mode 100644
index 0000000..a315489
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/client.py
@@ -0,0 +1,1413 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Zookeeper Client"""
+import inspect
+import logging
+import os
+import re
+import warnings
+from collections import defaultdict, deque
+from functools import partial
+from os.path import split
+
+from kazoo.exceptions import (
+    AuthFailedError,
+    ConfigurationError,
+    ConnectionClosedError,
+    ConnectionLoss,
+    NoNodeError,
+    NodeExistsError,
+    SessionExpiredError,
+    WriterNotClosedException,
+)
+from kazoo.handlers.threading import SequentialThreadingHandler
+from kazoo.handlers.utils import capture_exceptions, wrap
+from kazoo.hosts import collect_hosts
+from kazoo.loggingsupport import BLATHER
+from kazoo.protocol.connection import ConnectionHandler
+from kazoo.protocol.paths import normpath
+from kazoo.protocol.paths import _prefix_root
+from kazoo.protocol.serialization import (
+    Auth,
+    CheckVersion,
+    CloseInstance,
+    Create,
+    Delete,
+    Exists,
+    GetChildren,
+    GetChildren2,
+    GetACL,
+    SetACL,
+    GetData,
+    SetData,
+    Sync,
+    Transaction
+)
+from kazoo.protocol.states import KazooState
+from kazoo.protocol.states import KeeperState
+from kazoo.retry import KazooRetry
+from kazoo.security import ACL
+from kazoo.security import OPEN_ACL_UNSAFE
+
+# convenience API
+from kazoo.recipe.barrier import Barrier
+from kazoo.recipe.barrier import DoubleBarrier
+from kazoo.recipe.counter import Counter
+from kazoo.recipe.election import Election
+from kazoo.recipe.lock import Lock
+from kazoo.recipe.lock import Semaphore
+from kazoo.recipe.partitioner import SetPartitioner
+from kazoo.recipe.party import Party
+from kazoo.recipe.party import ShallowParty
+from kazoo.recipe.queue import Queue
+from kazoo.recipe.queue import LockingQueue
+from kazoo.recipe.watchers import ChildrenWatch
+from kazoo.recipe.watchers import DataWatch
+
+try:  # pragma: nocover
+    basestring
+except NameError:  # pragma: nocover
+    basestring = str
+
+LOST_STATES = (KeeperState.EXPIRED_SESSION, KeeperState.AUTH_FAILED,
+               KeeperState.CLOSED)
+ENVI_VERSION = re.compile('[\w\s:.]*=([\d\.]*).*', re.DOTALL)
+log = logging.getLogger(__name__)
+
+
+_RETRY_COMPAT_DEFAULTS = dict(
+    max_retries=None,
+    retry_delay=0.1,
+    retry_backoff=2,
+    retry_jitter=0.8,
+    retry_max_delay=3600,
+)
+
+_RETRY_COMPAT_MAPPING = dict(
+    max_retries='max_tries',
+    retry_delay='delay',
+    retry_backoff='backoff',
+    retry_jitter='max_jitter',
+    retry_max_delay='max_delay',
+)
+
+
+class KazooClient(object):
+    """An Apache Zookeeper Python client supporting alternate callback
+    handlers and high-level functionality.
+
+    Watch functions registered with this class will not get session
+    events, unlike the default Zookeeper watches. They will also be
+    called with a single argument, a
+    :class:`~kazoo.protocol.states.WatchedEvent` instance.
+
+    """
+    def __init__(self, hosts='127.0.0.1:2181',
+                 timeout=10.0, client_id=None, handler=None,
+                 default_acl=None, auth_data=None, read_only=None,
+                 randomize_hosts=True, connection_retry=None,
+                 command_retry=None, logger=None, **kwargs):
+        """Create a :class:`KazooClient` instance. All time arguments
+        are in seconds.
+
+        :param hosts: Comma-separated list of hosts to connect to
+                      (e.g. 127.0.0.1:2181,127.0.0.1:2182,[::1]:2183).
+        :param timeout: The longest to wait for a Zookeeper connection.
+        :param client_id: A Zookeeper client id, used when
+                          re-establishing a prior session connection.
+        :param handler: An instance of a class implementing the
+                        :class:`~kazoo.interfaces.IHandler` interface
+                        for callback handling.
+        :param default_acl: A default ACL used on node creation.
+        :param auth_data:
+            A list of authentication credentials to use for the
+            connection. Should be a list of (scheme, credential)
+            tuples as :meth:`add_auth` takes.
+        :param read_only: Allow connections to read only servers.
+        :param randomize_hosts: By default randomize host selection.
+        :param connection_retry:
+            A :class:`kazoo.retry.KazooRetry` object to use for
+            retrying the connection to Zookeeper. Also can be a dict of
+            options which will be used for creating one.
+        :param command_retry:
+            A :class:`kazoo.retry.KazooRetry` object to use for
+            the :meth:`KazooClient.retry` method. Also can be a dict of
+            options which will be used for creating one.
+        :param logger: A custom logger to use instead of the module
+            global `log` instance.
+
+        Basic Example:
+
+        .. code-block:: python
+
+            zk = KazooClient()
+            zk.start()
+            children = zk.get_children('/')
+            zk.stop()
+
+        As a convenience all recipe classes are available as attributes
+        and get automatically bound to the client. For example::
+
+            zk = KazooClient()
+            zk.start()
+            lock = zk.Lock('/lock_path')
+
+        .. versionadded:: 0.6
+            The read_only option. Requires Zookeeper 3.4+
+
+        .. versionadded:: 0.6
+            The retry_max_delay option.
+
+        .. versionadded:: 0.6
+            The randomize_hosts option.
+
+        .. versionchanged:: 0.8
+            Removed the unused watcher argument (was second argument).
+
+        .. versionadded:: 1.2
+            The connection_retry, command_retry and logger options.
+
+        """
+        self.logger = logger or log
+
+        # Record the handler strategy used
+        self.handler = handler if handler else SequentialThreadingHandler()
+        if inspect.isclass(self.handler):
+            raise ConfigurationError("Handler must be an instance of a class, "
+                                     "not the class: %s" % self.handler)
+
+        self.auth_data = auth_data if auth_data else set([])
+        self.default_acl = default_acl
+        self.randomize_hosts = randomize_hosts
+        self.hosts = None
+        self.chroot = None
+        self.set_hosts(hosts)
+
+        # Curator like simplified state tracking, and listeners for
+        # state transitions
+        self._state = KeeperState.CLOSED
+        self.state = KazooState.LOST
+        self.state_listeners = set()
+
+        self._reset()
+        self.read_only = read_only
+
+        if client_id:
+            self._session_id = client_id[0]
+            self._session_passwd = client_id[1]
+        else:
+            self._reset_session()
+
+        # ZK uses milliseconds
+        self._session_timeout = int(timeout * 1000)
+
+        # We use events like twitter's client to track current and
+        # desired state (connected, and whether to shutdown)
+        self._live = self.handler.event_object()
+        self._writer_stopped = self.handler.event_object()
+        self._stopped = self.handler.event_object()
+        self._stopped.set()
+        self._writer_stopped.set()
+
+        self.retry = self._conn_retry = None
+
+        if type(connection_retry) is dict:
+            self._conn_retry = KazooRetry(**connection_retry)
+        elif type(connection_retry) is KazooRetry:
+            self._conn_retry = connection_retry
+
+        if type(command_retry) is dict:
+            self.retry = KazooRetry(**command_retry)
+        elif type(command_retry) is KazooRetry:
+            self.retry = command_retry
+
+
+        if type(self._conn_retry) is KazooRetry:
+            if self.handler.sleep_func != self._conn_retry.sleep_func:
+                raise ConfigurationError("Retry handler and event handler "
+                                         " must use the same sleep func")
+
+        if type(self.retry) is KazooRetry:
+            if self.handler.sleep_func != self.retry.sleep_func:
+                raise ConfigurationError("Command retry handler and event "
+                                         "handler must use the same sleep func")
+
+        if self.retry is None or self._conn_retry is None:
+            old_retry_keys = dict(_RETRY_COMPAT_DEFAULTS)
+            for key in old_retry_keys:
+                try:
+                    old_retry_keys[key] = kwargs.pop(key)
+                    warnings.warn('Passing retry configuration param %s to the'
+                            ' client directly is deprecated, please pass a'
+                            ' configured retry object (using param %s)' % (
+                                key, _RETRY_COMPAT_MAPPING[key]),
+                            DeprecationWarning, stacklevel=2)
+                except KeyError:
+                    pass
+
+            retry_keys = {}
+            for oldname, value in old_retry_keys.items():
+                retry_keys[_RETRY_COMPAT_MAPPING[oldname]] = value
+
+            if self._conn_retry is None:
+                self._conn_retry = KazooRetry(
+                    sleep_func=self.handler.sleep_func,
+                    **retry_keys)
+            if self.retry is None:
+                self.retry = KazooRetry(
+                    sleep_func=self.handler.sleep_func,
+                    **retry_keys)
+
+        self._conn_retry.interrupt = lambda: self._stopped.is_set()
+        self._connection = ConnectionHandler(self, self._conn_retry.copy(),
+            logger=self.logger)
+
+        # Every retry call should have its own copy of the retry helper
+        # to avoid shared retry counts
+        self._retry = self.retry
+        def _retry(*args, **kwargs):
+            return self._retry.copy()(*args, **kwargs)
+        self.retry = _retry
+
+        self.Barrier = partial(Barrier, self)
+        self.Counter = partial(Counter, self)
+        self.DoubleBarrier = partial(DoubleBarrier, self)
+        self.ChildrenWatch = partial(ChildrenWatch, self)
+        self.DataWatch = partial(DataWatch, self)
+        self.Election = partial(Election, self)
+        self.Lock = partial(Lock, self)
+        self.Party = partial(Party, self)
+        self.Queue = partial(Queue, self)
+        self.LockingQueue = partial(LockingQueue, self)
+        self.SetPartitioner = partial(SetPartitioner, self)
+        self.Semaphore = partial(Semaphore, self)
+        self.ShallowParty = partial(ShallowParty, self)
+
+         # If we got any unhandled keywords, complain like python would
+        if kwargs:
+            raise TypeError('__init__() got unexpected keyword arguments: %s'
+                            % (kwargs.keys(),))
+
+    def _reset(self):
+        """Resets a variety of client states for a new connection."""
+        self._queue = deque()
+        self._pending = deque()
+
+        self._reset_watchers()
+        self._reset_session()
+        self.last_zxid = 0
+        self._protocol_version = None
+
+    def _reset_watchers(self):
+        self._child_watchers = defaultdict(set)
+        self._data_watchers = defaultdict(set)
+
+    def _reset_session(self):
+        self._session_id = None
+        self._session_passwd = b'\x00' * 16
+
+    @property
+    def client_state(self):
+        """Returns the last Zookeeper client state
+
+        This is the non-simplified state information and is generally
+        not as useful as the simplified KazooState information.
+
+        """
+        return self._state
+
+    @property
+    def client_id(self):
+        """Returns the client id for this Zookeeper session if
+        connected.
+
+        :returns: client id which consists of the session id and
+                  password.
+        :rtype: tuple
+        """
+        if self._live.is_set():
+            return (self._session_id, self._session_passwd)
+        return None
+
+    @property
+    def connected(self):
+        """Returns whether the Zookeeper connection has been
+        established."""
+        return self._live.is_set()
+
+    def set_hosts(self, hosts, randomize_hosts=None):
+        """ sets the list of hosts used by this client.
+
+        This function accepts the same format hosts parameter as the init
+        function and sets the client to use the new hosts the next time it
+        needs to look up a set of hosts. This function does not affect the
+        current connected status.
+
+        It is not currently possible to change the chroot with this function,
+        setting a host list with a new chroot will raise a ConfigurationError.
+
+        :param hosts: see description in :meth:`KazooClient.__init__`
+        :param randomize_hosts: override client default for host randomization
+        :raises:
+            :exc:`ConfigurationError` if the hosts argument changes the chroot
+
+        .. versionadded:: 1.4
+
+        .. warning::
+
+            Using this function to point a client to a completely disparate
+            zookeeper server cluster has undefined behavior.
+
+        """
+
+        if randomize_hosts is None:
+            randomize_hosts = self.randomize_hosts
+
+        self.hosts, chroot = collect_hosts(hosts, randomize_hosts)
+
+        if chroot:
+            new_chroot = normpath(chroot)
+        else:
+            new_chroot = ''
+
+        if self.chroot is not None and new_chroot != self.chroot:
+            raise ConfigurationError("Changing chroot at runtime is not "
+                                     "currently supported")
+
+        self.chroot = new_chroot
+
+    def add_listener(self, listener):
+        """Add a function to be called for connection state changes.
+
+        This function will be called with a
+        :class:`~kazoo.protocol.states.KazooState` instance indicating
+        the new connection state on state transitions.
+
+        .. warning::
+
+            This function must not block. If its at all likely that it
+            might need data or a value that could result in blocking
+            than the :meth:`~kazoo.interfaces.IHandler.spawn` method
+            should be used so that the listener can return immediately.
+
+        """
+        if not (listener and callable(listener)):
+            raise ConfigurationError("listener must be callable")
+        self.state_listeners.add(listener)
+
+    def remove_listener(self, listener):
+        """Remove a listener function"""
+        self.state_listeners.discard(listener)
+
+    def _make_state_change(self, state):
+        # skip if state is current
+        if self.state == state:
+            return
+
+        self.state = state
+
+        # Create copy of listeners for iteration in case one needs to
+        # remove itself
+        for listener in list(self.state_listeners):
+            try:
+                remove = listener(state)
+                if remove is True:
+                    self.remove_listener(listener)
+            except Exception:
+                self.logger.exception("Error in connection state listener")
+
+    def _session_callback(self, state):
+        if state == self._state:
+            return
+
+        # Note that we don't check self.state == LOST since that's also
+        # the client's initial state
+        dead_state = self._state in LOST_STATES
+        self._state = state
+
+        # If we were previously closed or had an expired session, and
+        # are now connecting, don't bother with the rest of the
+        # transitions since they only apply after
+        # we've established a connection
+        if dead_state and state == KeeperState.CONNECTING:
+            self.logger.log(BLATHER, "Skipping state change")
+            return
+
+        if state in (KeeperState.CONNECTED, KeeperState.CONNECTED_RO):
+            self.logger.info("Zookeeper connection established, state: %s", state)
+            self._live.set()
+            self._make_state_change(KazooState.CONNECTED)
+        elif state in LOST_STATES:
+            self.logger.info("Zookeeper session lost, state: %s", state)
+            self._live.clear()
+            self._make_state_change(KazooState.LOST)
+            self._notify_pending(state)
+            self._reset()
+        else:
+            self.logger.info("Zookeeper connection lost")
+            # Connection lost
+            self._live.clear()
+            self._notify_pending(state)
+            self._make_state_change(KazooState.SUSPENDED)
+            self._reset_watchers()
+
+    def _notify_pending(self, state):
+        """Used to clear a pending response queue and request queue
+        during connection drops."""
+        if state == KeeperState.AUTH_FAILED:
+            exc = AuthFailedError()
+        elif state == KeeperState.EXPIRED_SESSION:
+            exc = SessionExpiredError()
+        else:
+            exc = ConnectionLoss()
+
+        while True:
+            try:
+                request, async_object, xid = self._pending.popleft()
+                if async_object:
+                    async_object.set_exception(exc)
+            except IndexError:
+                break
+
+        while True:
+            try:
+                request, async_object = self._queue.popleft()
+                if async_object:
+                    async_object.set_exception(exc)
+            except IndexError:
+                break
+
+    def _safe_close(self):
+        self.handler.stop()
+        timeout = self._session_timeout // 1000
+        if timeout < 10:
+            timeout = 10
+        if not self._connection.stop(timeout):
+            raise WriterNotClosedException(
+                "Writer still open from prior connection "
+                "and wouldn't close after %s seconds" % timeout)
+
+    def _call(self, request, async_object):
+        """Ensure there's an active connection and put the request in
+        the queue if there is.
+
+        Returns False if the call short circuits due to AUTH_FAILED,
+        CLOSED, EXPIRED_SESSION or CONNECTING state.
+
+        """
+
+        if self._state == KeeperState.AUTH_FAILED:
+            async_object.set_exception(AuthFailedError())
+            return False
+        elif self._state == KeeperState.CLOSED:
+            async_object.set_exception(ConnectionClosedError(
+                "Connection has been closed"))
+            return False
+        elif self._state in (KeeperState.EXPIRED_SESSION,
+                             KeeperState.CONNECTING):
+            async_object.set_exception(SessionExpiredError())
+            return False
+
+        self._queue.append((request, async_object))
+
+        # wake the connection, guarding against a race with close()
+        write_pipe = self._connection._write_pipe
+        if write_pipe is None:
+            async_object.set_exception(ConnectionClosedError(
+                "Connection has been closed"))
+        try:
+            os.write(write_pipe, b'\0')
+        except:
+            async_object.set_exception(ConnectionClosedError(
+                "Connection has been closed"))
+
+    def start(self, timeout=15):
+        """Initiate connection to ZK.
+
+        :param timeout: Time in seconds to wait for connection to
+                        succeed.
+        :raises: :attr:`~kazoo.interfaces.IHandler.timeout_exception`
+                 if the connection wasn't established within `timeout`
+                 seconds.
+
+        """
+        event = self.start_async()
+        event.wait(timeout=timeout)
+        if not self.connected:
+            # We time-out, ensure we are disconnected
+            self.stop()
+            raise self.handler.timeout_exception("Connection time-out")
+
+        if self.chroot and not self.exists("/"):
+            warnings.warn("No chroot path exists, the chroot path "
+                          "should be created before normal use.")
+
+    def start_async(self):
+        """Asynchronously initiate connection to ZK.
+
+        :returns: An event object that can be checked to see if the
+                  connection is alive.
+        :rtype: :class:`~threading.Event` compatible object.
+
+        """
+        # If we're already connected, ignore
+        if self._live.is_set():
+            return self._live
+
+        # Make sure we're safely closed
+        self._safe_close()
+
+        # We've been asked to connect, clear the stop and our writer
+        # thread indicator
+        self._stopped.clear()
+        self._writer_stopped.clear()
+
+        # Start the handler
+        self.handler.start()
+
+        # Start the connection
+        self._connection.start()
+        return self._live
+
+    def stop(self):
+        """Gracefully stop this Zookeeper session.
+
+        This method can be called while a reconnection attempt is in
+        progress, which will then be halted.
+
+        Once the connection is closed, its session becomes invalid. All
+        the ephemeral nodes in the ZooKeeper server associated with the
+        session will be removed. The watches left on those nodes (and
+        on their parents) will be triggered.
+
+        """
+        if self._stopped.is_set():
+            return
+
+        self._stopped.set()
+        self._queue.append((CloseInstance, None))
+        os.write(self._connection._write_pipe, b'\0')
+        self._safe_close()
+
+    def restart(self):
+        """Stop and restart the Zookeeper session."""
+        self.stop()
+        self.start()
+
+    def close(self):
+        """Free any resources held by the client.
+
+        This method should be called on a stopped client before it is
+        discarded. Not doing so may result in filehandles being leaked.
+
+        .. versionadded:: 1.0
+        """
+        self._connection.close()
+
+    def command(self, cmd=b'ruok'):
+        """Sent a management command to the current ZK server.
+
+        Examples are `ruok`, `envi` or `stat`.
+
+        :returns: An unstructured textual response.
+        :rtype: str
+
+        :raises:
+            :exc:`ConnectionLoss` if there is no connection open, or
+            possibly a :exc:`socket.error` if there's a problem with
+            the connection used just for this command.
+
+        .. versionadded:: 0.5
+
+        """
+        if not self._live.is_set():
+            raise ConnectionLoss("No connection to server")
+
+        peer = self._connection._socket.getpeername()
+        sock = self.handler.create_connection(
+            peer, timeout=self._session_timeout / 1000.0)
+        sock.sendall(cmd)
+        result = sock.recv(8192)
+        sock.close()
+        return result.decode('utf-8', 'replace')
+
+    def server_version(self):
+        """Get the version of the currently connected ZK server.
+
+        :returns: The server version, for example (3, 4, 3).
+        :rtype: tuple
+
+        .. versionadded:: 0.5
+
+        """
+        data = self.command(b'envi')
+        string = ENVI_VERSION.match(data).group(1)
+        return tuple([int(i) for i in string.split('.')])
+
+    def add_auth(self, scheme, credential):
+        """Send credentials to server.
+
+        :param scheme: authentication scheme (default supported:
+                       "digest").
+        :param credential: the credential -- value depends on scheme.
+
+        :returns: True if it was successful.
+        :rtype: bool
+
+        :raises:
+            :exc:`~kazoo.exceptions.AuthFailedError` if it failed though
+            the session state will be set to AUTH_FAILED as well.
+
+        """
+        return self.add_auth_async(scheme, credential).get()
+
+    def add_auth_async(self, scheme, credential):
+        """Asynchronously send credentials to server. Takes the same
+        arguments as :meth:`add_auth`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(scheme, basestring):
+            raise TypeError("Invalid type for scheme")
+        if not isinstance(credential, basestring):
+            raise TypeError("Invalid type for credential")
+
+        # we need this auth data to re-authenticate on reconnect
+        self.auth_data.add((scheme, credential))
+
+        async_result = self.handler.async_result()
+        self._call(Auth(0, scheme, credential), async_result)
+        return async_result
+
+    def unchroot(self, path):
+        """Strip the chroot if applicable from the path."""
+        if not self.chroot:
+            return path
+
+        if path.startswith(self.chroot):
+            return path[len(self.chroot):]
+        else:
+            return path
+
+    def sync_async(self, path):
+        """Asynchronous sync.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        async_result = self.handler.async_result()
+        self._call(Sync(_prefix_root(self.chroot, path)), async_result)
+        return async_result
+
+    def sync(self, path):
+        """Sync, blocks until response is acknowledged.
+
+        Flushes channel between process and leader.
+
+        :param path: path of node.
+        :returns: The node path that was synced.
+        :raises:
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        .. versionadded:: 0.5
+
+        """
+        return self.sync_async(path).get()
+
+    def create(self, path, value=b"", acl=None, ephemeral=False,
+               sequence=False, makepath=False):
+        """Create a node with the given value as its data. Optionally
+        set an ACL on the node.
+
+        The ephemeral and sequence arguments determine the type of the
+        node.
+
+        An ephemeral node will be automatically removed by ZooKeeper
+        when the session associated with the creation of the node
+        expires.
+
+        A sequential node will be given the specified path plus a
+        suffix `i` where i is the current sequential number of the
+        node. The sequence number is always fixed length of 10 digits,
+        0 padded. Once such a node is created, the sequential number
+        will be incremented by one.
+
+        If a node with the same actual path already exists in
+        ZooKeeper, a NodeExistsError will be raised. Note that since a
+        different actual path is used for each invocation of creating
+        sequential nodes with the same path argument, the call will
+        never raise NodeExistsError.
+
+        If the parent node does not exist in ZooKeeper, a NoNodeError
+        will be raised. Setting the optional `makepath` argument to
+        `True` will create all missing parent nodes instead.
+
+        An ephemeral node cannot have children. If the parent node of
+        the given path is ephemeral, a NoChildrenForEphemeralsError
+        will be raised.
+
+        This operation, if successful, will trigger all the watches
+        left on the node of the given path by :meth:`exists` and
+        :meth:`get` API calls, and the watches left on the parent node
+        by :meth:`get_children` API calls.
+
+        The maximum allowable size of the node value is 1 MB. Values
+        larger than this will cause a ZookeeperError to be raised.
+
+        :param path: Path of node.
+        :param value: Initial bytes value of node.
+        :param acl: :class:`~kazoo.security.ACL` list.
+        :param ephemeral: Boolean indicating whether node is ephemeral
+                          (tied to this session).
+        :param sequence: Boolean indicating whether path is suffixed
+                         with a unique index.
+        :param makepath: Whether the path should be created if it
+                         doesn't exist.
+        :returns: Real path of the new node.
+        :rtype: str
+
+        :raises:
+            :exc:`~kazoo.exceptions.NodeExistsError` if the node
+            already exists.
+
+            :exc:`~kazoo.exceptions.NoNodeError` if parent nodes are
+            missing.
+
+            :exc:`~kazoo.exceptions.NoChildrenForEphemeralsError` if
+            the parent node is an ephemeral node.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the provided
+            value is too large.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        """
+        acl = acl or self.default_acl
+        return self.create_async(path, value, acl=acl, ephemeral=ephemeral,
+            sequence=sequence, makepath=makepath).get()
+
+    def create_async(self, path, value=b"", acl=None, ephemeral=False,
+                     sequence=False, makepath=False):
+        """Asynchronously create a ZNode. Takes the same arguments as
+        :meth:`create`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        .. versionadded:: 1.1
+            The makepath option.
+
+        """
+        if acl is None and self.default_acl:
+            acl = self.default_acl
+
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if acl and (isinstance(acl, ACL) or
+                    not isinstance(acl, (tuple, list))):
+            raise TypeError("acl must be a tuple/list of ACL's")
+        if value is not None and not isinstance(value, bytes):
+            raise TypeError("value must be a byte string")
+        if not isinstance(ephemeral, bool):
+            raise TypeError("ephemeral must be a bool")
+        if not isinstance(sequence, bool):
+            raise TypeError("sequence must be a bool")
+        if not isinstance(makepath, bool):
+            raise TypeError("makepath must be a bool")
+
+        flags = 0
+        if ephemeral:
+            flags |= 1
+        if sequence:
+            flags |= 2
+        if acl is None:
+            acl = OPEN_ACL_UNSAFE
+
+        async_result = self.handler.async_result()
+
+        @capture_exceptions(async_result)
+        def do_create():
+            result = self._create_async_inner(path, value, acl, flags, trailing=sequence)
+            result.rawlink(create_completion)
+
+        @capture_exceptions(async_result)
+        def retry_completion(result):
+            result.get()
+            do_create()
+
+        @wrap(async_result)
+        def create_completion(result):
+            try:
+                return self.unchroot(result.get())
+            except NoNodeError:
+                if not makepath:
+                    raise
+                if sequence and path.endswith('/'):
+                    parent = path.rstrip('/')
+                else:
+                    parent, _ = split(path)
+                self.ensure_path_async(parent, acl).rawlink(retry_completion)
+
+        do_create()
+        return async_result
+
+    def _create_async_inner(self, path, value, acl, flags, trailing=False):
+        async_result = self.handler.async_result()
+        call_result = self._call(
+            Create(_prefix_root(self.chroot, path, trailing=trailing),
+                   value, acl, flags), async_result)
+        if call_result is False:
+            # We hit a short-circuit exit on the _call. Because we are
+            # not using the original async_result here, we bubble the
+            # exception upwards to the do_create function in
+            # KazooClient.create so that it gets set on the correct
+            # async_result object
+            raise async_result.exception
+        return async_result
+
+    def ensure_path(self, path, acl=None):
+        """Recursively create a path if it doesn't exist.
+
+        :param path: Path of node.
+        :param acl: Permissions for node.
+
+        """
+        return self.ensure_path_async(path, acl).get()
+
+    def ensure_path_async(self, path, acl=None):
+        """Recursively create a path asynchronously if it doesn't
+        exist. Takes the same arguments as :meth:`ensure_path`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        .. versionadded:: 1.1
+
+        """
+        acl = acl or self.default_acl
+        async_result = self.handler.async_result()
+
+        @wrap(async_result)
+        def create_completion(result):
+            try:
+                return result.get()
+            except NodeExistsError:
+                return True
+
+        @capture_exceptions(async_result)
+        def prepare_completion(next_path, result):
+            result.get()
+            self.create_async(next_path, acl=acl).rawlink(create_completion)
+
+        @wrap(async_result)
+        def exists_completion(path, result):
+            if result.get():
+                return True
+            parent, node = split(path)
+            if node:
+                self.ensure_path_async(parent, acl=acl).rawlink(
+                    partial(prepare_completion, path))
+            else:
+                self.create_async(path, acl=acl).rawlink(create_completion)
+
+        self.exists_async(path).rawlink(partial(exists_completion, path))
+
+        return async_result
+
+    def exists(self, path, watch=None):
+        """Check if a node exists.
+
+        If a watch is provided, it will be left on the node with the
+        given path. The watch will be triggered by a successful
+        operation that creates/deletes the node or sets the data on the
+        node.
+
+        :param path: Path of node.
+        :param watch: Optional watch callback to set for future changes
+                      to this path.
+        :returns: ZnodeStat of the node if it exists, else None if the
+                  node does not exist.
+        :rtype: :class:`~kazoo.protocol.states.ZnodeStat` or `None`.
+
+        :raises:
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        """
+        return self.exists_async(path, watch).get()
+
+    def exists_async(self, path, watch=None):
+        """Asynchronously check if a node exists. Takes the same
+        arguments as :meth:`exists`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if watch and not callable(watch):
+            raise TypeError("watch must be a callable")
+
+        async_result = self.handler.async_result()
+        self._call(Exists(_prefix_root(self.chroot, path), watch),
+                   async_result)
+        return async_result
+
+    def get(self, path, watch=None):
+        """Get the value of a node.
+
+        If a watch is provided, it will be left on the node with the
+        given path. The watch will be triggered by a successful
+        operation that sets data on the node, or deletes the node.
+
+        :param path: Path of node.
+        :param watch: Optional watch callback to set for future changes
+                      to this path.
+        :returns:
+            Tuple (value, :class:`~kazoo.protocol.states.ZnodeStat`) of
+            node.
+        :rtype: tuple
+
+        :raises:
+            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+            exist
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code
+
+        """
+        return self.get_async(path, watch).get()
+
+    def get_async(self, path, watch=None):
+        """Asynchronously get the value of a node. Takes the same
+        arguments as :meth:`get`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if watch and not callable(watch):
+            raise TypeError("watch must be a callable")
+
+        async_result = self.handler.async_result()
+        self._call(GetData(_prefix_root(self.chroot, path), watch),
+                   async_result)
+        return async_result
+
+    def get_children(self, path, watch=None, include_data=False):
+        """Get a list of child nodes of a path.
+
+        If a watch is provided it will be left on the node with the
+        given path. The watch will be triggered by a successful
+        operation that deletes the node of the given path or
+        creates/deletes a child under the node.
+
+        The list of children returned is not sorted and no guarantee is
+        provided as to its natural or lexical order.
+
+        :param path: Path of node to list.
+        :param watch: Optional watch callback to set for future changes
+                      to this path.
+        :param include_data:
+            Include the :class:`~kazoo.protocol.states.ZnodeStat` of
+            the node in addition to the children. This option changes
+            the return value to be a tuple of (children, stat).
+
+        :returns: List of child node names, or tuple if `include_data`
+                  is `True`.
+        :rtype: list
+
+        :raises:
+            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+            exist.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        .. versionadded:: 0.5
+            The `include_data` option.
+
+        """
+        return self.get_children_async(path, watch, include_data).get()
+
+    def get_children_async(self, path, watch=None, include_data=False):
+        """Asynchronously get a list of child nodes of a path. Takes
+        the same arguments as :meth:`get_children`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if watch and not callable(watch):
+            raise TypeError("watch must be a callable")
+        if not isinstance(include_data, bool):
+            raise TypeError("include_data must be a bool")
+
+        async_result = self.handler.async_result()
+        if include_data:
+            req = GetChildren2(_prefix_root(self.chroot, path), watch)
+        else:
+            req = GetChildren(_prefix_root(self.chroot, path), watch)
+        self._call(req, async_result)
+        return async_result
+
+    def get_acls(self, path):
+        """Return the ACL and stat of the node of the given path.
+
+        :param path: Path of the node.
+        :returns: The ACL array of the given node and its
+            :class:`~kazoo.protocol.states.ZnodeStat`.
+        :rtype: tuple of (:class:`~kazoo.security.ACL` list,
+                :class:`~kazoo.protocol.states.ZnodeStat`)
+        :raises:
+            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+            exist.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code
+
+        .. versionadded:: 0.5
+
+        """
+        return self.get_acls_async(path).get()
+
+    def get_acls_async(self, path):
+        """Return the ACL and stat of the node of the given path. Takes
+        the same arguments as :meth:`get_acls`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+
+        async_result = self.handler.async_result()
+        self._call(GetACL(_prefix_root(self.chroot, path)), async_result)
+        return async_result
+
+    def set_acls(self, path, acls, version=-1):
+        """Set the ACL for the node of the given path.
+
+        Set the ACL for the node of the given path if such a node
+        exists and the given version matches the version of the node.
+
+        :param path: Path for the node.
+        :param acls: List of :class:`~kazoo.security.ACL` objects to
+                     set.
+        :param version: The expected node version that must match.
+        :returns: The stat of the node.
+        :raises:
+            :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
+            match.
+
+            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+            exist.
+
+            :exc:`~kazoo.exceptions.InvalidACLError` if the ACL is
+            invalid.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        .. versionadded:: 0.5
+
+        """
+        return self.set_acls_async(path, acls, version).get()
+
+    def set_acls_async(self, path, acls, version=-1):
+        """Set the ACL for the node of the given path. Takes the same
+        arguments as :meth:`set_acls`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if isinstance(acls, ACL) or not isinstance(acls, (tuple, list)):
+            raise TypeError("acl must be a tuple/list of ACL's")
+        if not isinstance(version, int):
+            raise TypeError("version must be an int")
+
+        async_result = self.handler.async_result()
+        self._call(SetACL(_prefix_root(self.chroot, path), acls, version),
+                   async_result)
+        return async_result
+
+    def set(self, path, value, version=-1):
+        """Set the value of a node.
+
+        If the version of the node being updated is newer than the
+        supplied version (and the supplied version is not -1), a
+        BadVersionError will be raised.
+
+        This operation, if successful, will trigger all the watches on
+        the node of the given path left by :meth:`get` API calls.
+
+        The maximum allowable size of the value is 1 MB. Values larger
+        than this will cause a ZookeeperError to be raised.
+
+        :param path: Path of node.
+        :param value: New data value.
+        :param version: Version of node being updated, or -1.
+        :returns: Updated :class:`~kazoo.protocol.states.ZnodeStat` of
+                  the node.
+
+        :raises:
+            :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
+            match.
+
+            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+            exist.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the provided
+            value is too large.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        """
+        return self.set_async(path, value, version).get()
+
+    def set_async(self, path, value, version=-1):
+        """Set the value of a node. Takes the same arguments as
+        :meth:`set`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if value is not None and not isinstance(value, bytes):
+            raise TypeError("value must be a byte string")
+        if not isinstance(version, int):
+            raise TypeError("version must be an int")
+
+        async_result = self.handler.async_result()
+        self._call(SetData(_prefix_root(self.chroot, path), value, version),
+                   async_result)
+        return async_result
+
+    def transaction(self):
+        """Create and return a :class:`TransactionRequest` object
+
+        Creates a :class:`TransactionRequest` object. A Transaction can
+        consist of multiple operations which can be committed as a
+        single atomic unit. Either all of the operations will succeed
+        or none of them.
+
+        :returns: A TransactionRequest.
+        :rtype: :class:`TransactionRequest`
+
+        .. versionadded:: 0.6
+            Requires Zookeeper 3.4+
+
+        """
+        return TransactionRequest(self)
+
+    def delete(self, path, version=-1, recursive=False):
+        """Delete a node.
+
+        The call will succeed if such a node exists, and the given
+        version matches the node's version (if the given version is -1,
+        the default, it matches any node's versions).
+
+        This operation, if successful, will trigger all the watches on
+        the node of the given path left by `exists` API calls, and the
+        watches on the parent node left by `get_children` API calls.
+
+        :param path: Path of node to delete.
+        :param version: Version of node to delete, or -1 for any.
+        :param recursive: Recursively delete node and all its children,
+                          defaults to False.
+        :type recursive: bool
+
+        :raises:
+            :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
+            match.
+
+            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
+            exist.
+
+            :exc:`~kazoo.exceptions.NotEmptyError` if the node has
+            children.
+
+            :exc:`~kazoo.exceptions.ZookeeperError` if the server
+            returns a non-zero error code.
+
+        """
+        if not isinstance(recursive, bool):
+            raise TypeError("recursive must be a bool")
+
+        if recursive:
+            return self._delete_recursive(path)
+        else:
+            return self.delete_async(path, version).get()
+
+    def delete_async(self, path, version=-1):
+        """Asynchronously delete a node. Takes the same arguments as
+        :meth:`delete`, with the exception of `recursive`.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if not isinstance(version, int):
+            raise TypeError("version must be an int")
+        async_result = self.handler.async_result()
+        self._call(Delete(_prefix_root(self.chroot, path), version),
+                   async_result)
+        return async_result
+
+    def _delete_recursive(self, path):
+        try:
+            children = self.get_children(path)
+        except NoNodeError:
+            return True
+
+        if children:
+            for child in children:
+                if path == "/":
+                    child_path = path + child
+                else:
+                    child_path = path + "/" + child
+
+                self._delete_recursive(child_path)
+        try:
+            self.delete(path)
+        except NoNodeError:  # pragma: nocover
+            pass
+
+
+class TransactionRequest(object):
+    """A Zookeeper Transaction Request
+
+    A Transaction provides a builder object that can be used to
+    construct and commit an atomic set of operations. The transaction
+    must be committed before its sent.
+
+    Transactions are not thread-safe and should not be accessed from
+    multiple threads at once.
+
+    .. versionadded:: 0.6
+        Requires Zookeeper 3.4+
+
+    """
+    def __init__(self, client):
+        self.client = client
+        self.operations = []
+        self.committed = False
+
+    def create(self, path, value=b"", acl=None, ephemeral=False,
+               sequence=False):
+        """Add a create ZNode to the transaction. Takes the same
+        arguments as :meth:`KazooClient.create`, with the exception
+        of `makepath`.
+
+        :returns: None
+
+        """
+        if acl is None and self.client.default_acl:
+            acl = self.client.default_acl
+
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if acl and not isinstance(acl, (tuple, list)):
+            raise TypeError("acl must be a tuple/list of ACL's")
+        if not isinstance(value, bytes):
+            raise TypeError("value must be a byte string")
+        if not isinstance(ephemeral, bool):
+            raise TypeError("ephemeral must be a bool")
+        if not isinstance(sequence, bool):
+            raise TypeError("sequence must be a bool")
+
+        flags = 0
+        if ephemeral:
+            flags |= 1
+        if sequence:
+            flags |= 2
+        if acl is None:
+            acl = OPEN_ACL_UNSAFE
+
+        self._add(Create(_prefix_root(self.client.chroot, path), value, acl,
+                         flags), None)
+
+    def delete(self, path, version=-1):
+        """Add a delete ZNode to the transaction. Takes the same
+        arguments as :meth:`KazooClient.delete`, with the exception of
+        `recursive`.
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if not isinstance(version, int):
+            raise TypeError("version must be an int")
+        self._add(Delete(_prefix_root(self.client.chroot, path), version))
+
+    def set_data(self, path, value, version=-1):
+        """Add a set ZNode value to the transaction. Takes the same
+        arguments as :meth:`KazooClient.set`.
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if not isinstance(value, bytes):
+            raise TypeError("value must be a byte string")
+        if not isinstance(version, int):
+            raise TypeError("version must be an int")
+        self._add(SetData(_prefix_root(self.client.chroot, path), value,
+                  version))
+
+    def check(self, path, version):
+        """Add a Check Version to the transaction.
+
+        This command will fail and abort a transaction if the path
+        does not match the specified version.
+
+        """
+        if not isinstance(path, basestring):
+            raise TypeError("path must be a string")
+        if not isinstance(version, int):
+            raise TypeError("version must be an int")
+        self._add(CheckVersion(_prefix_root(self.client.chroot, path),
+                  version))
+
+    def commit_async(self):
+        """Commit the transaction asynchronously.
+
+        :rtype: :class:`~kazoo.interfaces.IAsyncResult`
+
+        """
+        self._check_tx_state()
+        self.committed = True
+        async_object = self.client.handler.async_result()
+        self.client._call(Transaction(self.operations), async_object)
+        return async_object
+
+    def commit(self):
+        """Commit the transaction.
+
+        :returns: A list of the results for each operation in the
+                  transaction.
+
+        """
+        return self.commit_async().get()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        """Commit and cleanup accumulated transaction data."""
+        if not exc_type:
+            self.commit()
+
+    def _check_tx_state(self):
+        if self.committed:
+            raise ValueError('Transaction already committed')
+
+    def _add(self, request, post_processor=None):
+        self._check_tx_state()
+        self.client.logger.log(BLATHER, 'Added %r to %r', request, self)
+        self.operations.append(request)
diff --git a/slider-agent/src/main/python/kazoo/exceptions.py b/slider-agent/src/main/python/kazoo/exceptions.py
new file mode 100644
index 0000000..9c9e71d
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/exceptions.py
@@ -0,0 +1,200 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Exceptions"""
+from collections import defaultdict
+
+
+class KazooException(Exception):
+    """Base Kazoo exception that all other kazoo library exceptions
+    inherit from"""
+
+
+class ZookeeperError(KazooException):
+    """Base Zookeeper exception for errors originating from the
+    Zookeeper server"""
+
+
+class CancelledError(KazooException):
+    """Raised when a process is cancelled by another thread"""
+
+
+class ConfigurationError(KazooException):
+    """Raised if the configuration arguments to an object are
+    invalid"""
+
+
+class ZookeeperStoppedError(KazooException):
+    """Raised when the kazoo client stopped (and thus not connected)"""
+
+
+class ConnectionDropped(KazooException):
+    """Internal error for jumping out of loops"""
+
+
+class LockTimeout(KazooException):
+    """Raised if failed to acquire a lock.
+
+    .. versionadded:: 1.1
+    """
+
+
+class WriterNotClosedException(KazooException):
+    """Raised if the writer is unable to stop closing when requested.
+
+    .. versionadded:: 1.2
+    """
+
+
+def _invalid_error_code():
+    raise RuntimeError('Invalid error code')
+
+
+EXCEPTIONS = defaultdict(_invalid_error_code)
+
+
+def _zookeeper_exception(code):
+    def decorator(klass):
+        def create(*args, **kwargs):
+            return klass(args, kwargs)
+
+        EXCEPTIONS[code] = create
+        klass.code = code
+        return klass
+
+    return decorator
+
+
+@_zookeeper_exception(0)
+class RolledBackError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-1)
+class SystemZookeeperError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-2)
+class RuntimeInconsistency(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-3)
+class DataInconsistency(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-4)
+class ConnectionLoss(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-5)
+class MarshallingError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-6)
+class UnimplementedError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-7)
+class OperationTimeoutError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-8)
+class BadArgumentsError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-100)
+class APIError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-101)
+class NoNodeError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-102)
+class NoAuthError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-103)
+class BadVersionError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-108)
+class NoChildrenForEphemeralsError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-110)
+class NodeExistsError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-111)
+class NotEmptyError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-112)
+class SessionExpiredError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-113)
+class InvalidCallbackError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-114)
+class InvalidACLError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-115)
+class AuthFailedError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-118)
+class SessionMovedError(ZookeeperError):
+    pass
+
+
+@_zookeeper_exception(-119)
+class NotReadOnlyCallError(ZookeeperError):
+    """An API call that is not read-only was used while connected to
+    a read-only server"""
+
+
+class ConnectionClosedError(SessionExpiredError):
+    """Connection is closed"""
+
+
+# BW Compat aliases for C lib style exceptions
+ConnectionLossException = ConnectionLoss
+MarshallingErrorException = MarshallingError
+SystemErrorException = SystemZookeeperError
+RuntimeInconsistencyException = RuntimeInconsistency
+DataInconsistencyException = DataInconsistency
+UnimplementedException = UnimplementedError
+OperationTimeoutException = OperationTimeoutError
+BadArgumentsException = BadArgumentsError
+ApiErrorException = APIError
+NoNodeException = NoNodeError
+NoAuthException = NoAuthError
+BadVersionException = BadVersionError
+NoChildrenForEphemeralsException = NoChildrenForEphemeralsError
+NodeExistsException = NodeExistsError
+InvalidACLException = InvalidACLError
+AuthFailedException = AuthFailedError
+NotEmptyException = NotEmptyError
+SessionExpiredException = SessionExpiredError
+InvalidCallbackException = InvalidCallbackError
diff --git a/slider-agent/src/main/python/kazoo/handlers/__init__.py b/slider-agent/src/main/python/kazoo/handlers/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/handlers/gevent.py b/slider-agent/src/main/python/kazoo/handlers/gevent.py
new file mode 100644
index 0000000..060320c
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/gevent.py
@@ -0,0 +1,165 @@
+"""
+license: Apache License 2.0, see LICENSE for more details.
+A gevent based handler.
+"""
+
+from __future__ import absolute_import
+
+import atexit
+import logging
+
+import gevent
+import gevent.event
+import gevent.queue
+import gevent.select
+import gevent.thread
+
+from gevent.queue import Empty
+from gevent.queue import Queue
+from gevent import socket
+try:
+    from gevent.lock import Semaphore, RLock
+except ImportError:
+    from gevent.coros import Semaphore, RLock
+
+from kazoo.handlers.utils import create_tcp_socket, create_tcp_connection
+
+_using_libevent = gevent.__version__.startswith('0.')
+
+log = logging.getLogger(__name__)
+
+_STOP = object()
+
+AsyncResult = gevent.event.AsyncResult
+
+
+class SequentialGeventHandler(object):
+    """Gevent handler for sequentially executing callbacks.
+
+    This handler executes callbacks in a sequential manner. A queue is
+    created for each of the callback events, so that each type of event
+    has its callback type run sequentially.
+
+    Each queue type has a greenlet worker that pulls the callback event
+    off the queue and runs it in the order the client sees it.
+
+    This split helps ensure that watch callbacks won't block session
+    re-establishment should the connection be lost during a Zookeeper
+    client call.
+
+    Watch callbacks should avoid blocking behavior as the next callback
+    of that type won't be run until it completes. If you need to block,
+    spawn a new greenlet and return immediately so callbacks can
+    proceed.
+
+    """
+    name = "sequential_gevent_handler"
+    sleep_func = staticmethod(gevent.sleep)
+
+    def __init__(self):
+        """Create a :class:`SequentialGeventHandler` instance"""
+        self.callback_queue = Queue()
+        self._running = False
+        self._async = None
+        self._state_change = Semaphore()
+        self._workers = []
+
+    class timeout_exception(gevent.event.Timeout):
+        def __init__(self, msg):
+            gevent.event.Timeout.__init__(self, exception=msg)
+
+    def _create_greenlet_worker(self, queue):
+        def greenlet_worker():
+            while True:
+                try:
+                    func = queue.get()
+                    if func is _STOP:
+                        break
+                    func()
+                except Empty:
+                    continue
+                except Exception as exc:
+                    log.warning("Exception in worker greenlet")
+                    log.exception(exc)
+        return gevent.spawn(greenlet_worker)
+
+    def start(self):
+        """Start the greenlet workers."""
+        with self._state_change:
+            if self._running:
+                return
+
+            self._running = True
+
+            # Spawn our worker greenlets, we have
+            # - A callback worker for watch events to be called
+            for queue in (self.callback_queue,):
+                w = self._create_greenlet_worker(queue)
+                self._workers.append(w)
+            atexit.register(self.stop)
+
+    def stop(self):
+        """Stop the greenlet workers and empty all queues."""
+        with self._state_change:
+            if not self._running:
+                return
+
+            self._running = False
+
+            for queue in (self.callback_queue,):
+                queue.put(_STOP)
+
+            while self._workers:
+                worker = self._workers.pop()
+                worker.join()
+
+            # Clear the queues
+            self.callback_queue = Queue()  # pragma: nocover
+
+            if hasattr(atexit, "unregister"):
+                atexit.unregister(self.stop)
+
+    def select(self, *args, **kwargs):
+        return gevent.select.select(*args, **kwargs)
+
+    def socket(self, *args, **kwargs):
+        return create_tcp_socket(socket)
+
+    def create_connection(self, *args, **kwargs):
+        return create_tcp_connection(socket, *args, **kwargs)
+
+    def event_object(self):
+        """Create an appropriate Event object"""
+        return gevent.event.Event()
+
+    def lock_object(self):
+        """Create an appropriate Lock object"""
+        return gevent.thread.allocate_lock()
+
+    def rlock_object(self):
+        """Create an appropriate RLock object"""
+        return RLock()
+
+    def async_result(self):
+        """Create a :class:`AsyncResult` instance
+
+        The :class:`AsyncResult` instance will have its completion
+        callbacks executed in the thread the
+        :class:`SequentialGeventHandler` is created in (which should be
+        the gevent/main thread).
+
+        """
+        return AsyncResult()
+
+    def spawn(self, func, *args, **kwargs):
+        """Spawn a function to run asynchronously"""
+        return gevent.spawn(func, *args, **kwargs)
+
+    def dispatch_callback(self, callback):
+        """Dispatch to the callback object
+
+        The callback is put on separate queues to run depending on the
+        type as documented for the :class:`SequentialGeventHandler`.
+
+        """
+        self.callback_queue.put(lambda: callback.func(*callback.args))
diff --git a/slider-agent/src/main/python/kazoo/handlers/threading.py b/slider-agent/src/main/python/kazoo/handlers/threading.py
new file mode 100644
index 0000000..3ca9a8f
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/threading.py
@@ -0,0 +1,289 @@
+"""
+license: Apache License 2.0, see LICENSE for more details.
+A threading based handler.
+
+The :class:`SequentialThreadingHandler` is intended for regular Python
+environments that use threads.
+
+.. warning::
+
+    Do not use :class:`SequentialThreadingHandler` with applications
+    using asynchronous event loops (like gevent). Use the
+    :class:`~kazoo.handlers.gevent.SequentialGeventHandler` instead.
+
+"""
+from __future__ import absolute_import
+
+import atexit
+import logging
+import select
+import socket
+import threading
+import time
+
+try:
+    import Queue
+except ImportError:  # pragma: nocover
+    import queue as Queue
+
+from kazoo.handlers.utils import create_tcp_socket, create_tcp_connection
+
+# sentinel objects
+_NONE = object()
+_STOP = object()
+
+log = logging.getLogger(__name__)
+
+
+class TimeoutError(Exception):
+    pass
+
+
+class AsyncResult(object):
+    """A one-time event that stores a value or an exception"""
+    def __init__(self, handler):
+        self._handler = handler
+        self.value = None
+        self._exception = _NONE
+        self._condition = threading.Condition()
+        self._callbacks = []
+
+    def ready(self):
+        """Return true if and only if it holds a value or an
+        exception"""
+        return self._exception is not _NONE
+
+    def successful(self):
+        """Return true if and only if it is ready and holds a value"""
+        return self._exception is None
+
+    @property
+    def exception(self):
+        if self._exception is not _NONE:
+            return self._exception
+
+    def set(self, value=None):
+        """Store the value. Wake up the waiters."""
+        with self._condition:
+            self.value = value
+            self._exception = None
+
+            for callback in self._callbacks:
+                self._handler.completion_queue.put(
+                    lambda: callback(self)
+                )
+            self._condition.notify_all()
+
+    def set_exception(self, exception):
+        """Store the exception. Wake up the waiters."""
+        with self._condition:
+            self._exception = exception
+
+            for callback in self._callbacks:
+                self._handler.completion_queue.put(
+                    lambda: callback(self)
+                )
+            self._condition.notify_all()
+
+    def get(self, block=True, timeout=None):
+        """Return the stored value or raise the exception.
+
+        If there is no value raises TimeoutError.
+
+        """
+        with self._condition:
+            if self._exception is not _NONE:
+                if self._exception is None:
+                    return self.value
+                raise self._exception
+            elif block:
+                self._condition.wait(timeout)
+                if self._exception is not _NONE:
+                    if self._exception is None:
+                        return self.value
+                    raise self._exception
+
+            # if we get to this point we timeout
+            raise TimeoutError()
+
+    def get_nowait(self):
+        """Return the value or raise the exception without blocking.
+
+        If nothing is available, raises TimeoutError
+
+        """
+        return self.get(block=False)
+
+    def wait(self, timeout=None):
+        """Block until the instance is ready."""
+        with self._condition:
+            self._condition.wait(timeout)
+        return self._exception is not _NONE
+
+    def rawlink(self, callback):
+        """Register a callback to call when a value or an exception is
+        set"""
+        with self._condition:
+            # Are we already set? Dispatch it now
+            if self.ready():
+                self._handler.completion_queue.put(
+                    lambda: callback(self)
+                )
+                return
+
+            if callback not in self._callbacks:
+                self._callbacks.append(callback)
+
+    def unlink(self, callback):
+        """Remove the callback set by :meth:`rawlink`"""
+        with self._condition:
+            if self.ready():
+                # Already triggered, ignore
+                return
+
+            if callback in self._callbacks:
+                self._callbacks.remove(callback)
+
+
+class SequentialThreadingHandler(object):
+    """Threading handler for sequentially executing callbacks.
+
+    This handler executes callbacks in a sequential manner. A queue is
+    created for each of the callback events, so that each type of event
+    has its callback type run sequentially. These are split into two
+    queues, one for watch events and one for async result completion
+    callbacks.
+
+    Each queue type has a thread worker that pulls the callback event
+    off the queue and runs it in the order the client sees it.
+
+    This split helps ensure that watch callbacks won't block session
+    re-establishment should the connection be lost during a Zookeeper
+    client call.
+
+    Watch and completion callbacks should avoid blocking behavior as
+    the next callback of that type won't be run until it completes. If
+    you need to block, spawn a new thread and return immediately so
+    callbacks can proceed.
+
+    .. note::
+
+        Completion callbacks can block to wait on Zookeeper calls, but
+        no other completion callbacks will execute until the callback
+        returns.
+
+    """
+    name = "sequential_threading_handler"
+    timeout_exception = TimeoutError
+    sleep_func = staticmethod(time.sleep)
+    queue_impl = Queue.Queue
+    queue_empty = Queue.Empty
+
+    def __init__(self):
+        """Create a :class:`SequentialThreadingHandler` instance"""
+        self.callback_queue = self.queue_impl()
+        self.completion_queue = self.queue_impl()
+        self._running = False
+        self._state_change = threading.Lock()
+        self._workers = []
+
+    def _create_thread_worker(self, queue):
+        def thread_worker():  # pragma: nocover
+            while True:
+                try:
+                    func = queue.get()
+                    try:
+                        if func is _STOP:
+                            break
+                        func()
+                    except Exception:
+                        log.exception("Exception in worker queue thread")
+                    finally:
+                        queue.task_done()
+                except self.queue_empty:
+                    continue
+        t = threading.Thread(target=thread_worker)
+
+        # Even though these should be joined, it's possible stop might
+        # not issue in time so we set them to daemon to let the program
+        # exit anyways
+        t.daemon = True
+        t.start()
+        return t
+
+    def start(self):
+        """Start the worker threads."""
+        with self._state_change:
+            if self._running:
+                return
+
+            # Spawn our worker threads, we have
+            # - A callback worker for watch events to be called
+            # - A completion worker for completion events to be called
+            for queue in (self.completion_queue, self.callback_queue):
+                w = self._create_thread_worker(queue)
+                self._workers.append(w)
+            self._running = True
+            atexit.register(self.stop)
+
+    def stop(self):
+        """Stop the worker threads and empty all queues."""
+        with self._state_change:
+            if not self._running:
+                return
+
+            self._running = False
+
+            for queue in (self.completion_queue, self.callback_queue):
+                queue.put(_STOP)
+
+            self._workers.reverse()
+            while self._workers:
+                worker = self._workers.pop()
+                worker.join()
+
+            # Clear the queues
+            self.callback_queue = self.queue_impl()
+            self.completion_queue = self.queue_impl()
+            if hasattr(atexit, "unregister"):
+                atexit.unregister(self.stop)
+
+    def select(self, *args, **kwargs):
+        return select.select(*args, **kwargs)
+
+    def socket(self):
+        return create_tcp_socket(socket)
+
+    def create_connection(self, *args, **kwargs):
+        return create_tcp_connection(socket, *args, **kwargs)
+
+    def event_object(self):
+        """Create an appropriate Event object"""
+        return threading.Event()
+
+    def lock_object(self):
+        """Create a lock object"""
+        return threading.Lock()
+
+    def rlock_object(self):
+        """Create an appropriate RLock object"""
+        return threading.RLock()
+
+    def async_result(self):
+        """Create a :class:`AsyncResult` instance"""
+        return AsyncResult(self)
+
+    def spawn(self, func, *args, **kwargs):
+        t = threading.Thread(target=func, args=args, kwargs=kwargs)
+        t.daemon = True
+        t.start()
+        return t
+
+    def dispatch_callback(self, callback):
+        """Dispatch to the callback object
+
+        The callback is put on separate queues to run depending on the
+        type as documented for the :class:`SequentialThreadingHandler`.
+
+        """
+        self.callback_queue.put(lambda: callback.func(*callback.args))
diff --git a/slider-agent/src/main/python/kazoo/handlers/utils.py b/slider-agent/src/main/python/kazoo/handlers/utils.py
new file mode 100644
index 0000000..60d6404
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/handlers/utils.py
@@ -0,0 +1,94 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo handler helpers"""
+
+HAS_FNCTL = True
+try:
+    import fcntl
+except ImportError:  # pragma: nocover
+    HAS_FNCTL = False
+import functools
+import os
+
+
+def _set_fd_cloexec(fd):
+    flags = fcntl.fcntl(fd, fcntl.F_GETFD)
+    fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC)
+
+
+def _set_default_tcpsock_options(module, sock):
+    sock.setsockopt(module.IPPROTO_TCP, module.TCP_NODELAY, 1)
+    if HAS_FNCTL:
+        _set_fd_cloexec(sock)
+    return sock
+
+
+def create_pipe():
+    """Create a non-blocking read/write pipe.
+    """
+    r, w = os.pipe()
+    if HAS_FNCTL:
+        fcntl.fcntl(r, fcntl.F_SETFL, os.O_NONBLOCK)
+        fcntl.fcntl(w, fcntl.F_SETFL, os.O_NONBLOCK)
+        _set_fd_cloexec(r)
+        _set_fd_cloexec(w)
+    return r, w
+
+
+def create_tcp_socket(module):
+    """Create a TCP socket with the CLOEXEC flag set.
+    """
+    type_ = module.SOCK_STREAM
+    if hasattr(module, 'SOCK_CLOEXEC'):  # pragma: nocover
+        # if available, set cloexec flag during socket creation
+        type_ |= module.SOCK_CLOEXEC
+    sock = module.socket(module.AF_INET, type_)
+    _set_default_tcpsock_options(module, sock)
+    return sock
+
+
+def create_tcp_connection(module, address, timeout=None):
+    if timeout is None:
+        # thanks to create_connection() developers for
+        # this ugliness...
+        timeout = module._GLOBAL_DEFAULT_TIMEOUT
+
+    sock = module.create_connection(address, timeout)
+    _set_default_tcpsock_options(module, sock)
+    return sock
+
+
+def capture_exceptions(async_result):
+    """Return a new decorated function that propagates the exceptions of the
+    wrapped function to an async_result.
+
+    :param async_result: An async result implementing :class:`IAsyncResult`
+
+    """
+    def capture(function):
+        @functools.wraps(function)
+        def captured_function(*args, **kwargs):
+            try:
+                return function(*args, **kwargs)
+            except Exception as exc:
+                async_result.set_exception(exc)
+        return captured_function
+    return capture
+
+
+def wrap(async_result):
+    """Return a new decorated function that propagates the return value or
+    exception of wrapped function to an async_result.  NOTE: Only propagates a
+    non-None return value.
+
+    :param async_result: An async result implementing :class:`IAsyncResult`
+
+    """
+    def capture(function):
+        @capture_exceptions(async_result)
+        def captured_function(*args, **kwargs):
+            value = function(*args, **kwargs)
+            if value is not None:
+                async_result.set(value)
+            return value
+        return captured_function
+    return capture
diff --git a/slider-agent/src/main/python/kazoo/hosts.py b/slider-agent/src/main/python/kazoo/hosts.py
new file mode 100644
index 0000000..ca0dd35
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/hosts.py
@@ -0,0 +1,27 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import random
+
+try:
+    from urlparse import urlsplit
+except ImportError:
+    # try python3 then
+    from urllib.parse import urlsplit
+
+def collect_hosts(hosts, randomize=True):
+    """Collect a set of hosts and an optional chroot from a string."""
+    host_ports, chroot = hosts.partition("/")[::2]
+    chroot = "/" + chroot if chroot else None
+
+    result = []
+    for host_port in host_ports.split(","):
+        # put all complexity of dealing with
+        # IPv4 & IPv6 address:port on the urlsplit
+        res = urlsplit("xxx://" + host_port)
+        host = res.hostname
+        port = int(res.port) if res.port else 2181
+        result.append((host.strip(), port))
+
+    if randomize:
+        random.shuffle(result)
+
+    return result, chroot
diff --git a/slider-agent/src/main/python/kazoo/interfaces.py b/slider-agent/src/main/python/kazoo/interfaces.py
new file mode 100644
index 0000000..7aff561
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/interfaces.py
@@ -0,0 +1,204 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Interfaces
+
+.. versionchanged:: 1.4
+
+    The classes in this module used to be interface declarations based on
+    `zope.interface.Interface`. They were converted to normal classes and
+    now serve as documentation only.
+
+"""
+
+# public API
+
+
+class IHandler(object):
+    """A Callback Handler for Zookeeper completion and watch callbacks.
+
+    This object must implement several methods responsible for
+    determining how completion / watch callbacks are handled as well as
+    the method for calling :class:`IAsyncResult` callback functions.
+
+    These functions are used to abstract differences between a Python
+    threading environment and asynchronous single-threaded environments
+    like gevent. The minimum functionality needed for Kazoo to handle
+    these differences is encompassed in this interface.
+
+    The Handler should document how callbacks are called for:
+
+    * Zookeeper completion events
+    * Zookeeper watch events
+
+    .. attribute:: name
+
+        Human readable name of the Handler interface.
+
+    .. attribute:: timeout_exception
+
+        Exception class that should be thrown and captured if a
+        result is not available within the given time.
+
+    .. attribute:: sleep_func
+
+        Appropriate sleep function that can be called with a single
+        argument and sleep.
+
+    """
+
+    def start(self):
+        """Start the handler, used for setting up the handler."""
+
+    def stop(self):
+        """Stop the handler. Should block until the handler is safely
+        stopped."""
+
+    def select(self):
+        """A select method that implements Python's select.select
+        API"""
+
+    def socket(self):
+        """A socket method that implements Python's socket.socket
+        API"""
+
+    def create_connection(self):
+        """A socket method that implements Python's
+        socket.create_connection API"""
+
+    def event_object(self):
+        """Return an appropriate object that implements Python's
+        threading.Event API"""
+
+    def lock_object(self):
+        """Return an appropriate object that implements Python's
+        threading.Lock API"""
+
+    def rlock_object(self):
+        """Return an appropriate object that implements Python's
+        threading.RLock API"""
+
+    def async_result(self):
+        """Return an instance that conforms to the
+        :class:`~IAsyncResult` interface appropriate for this
+        handler"""
+
+    def spawn(self, func, *args, **kwargs):
+        """Spawn a function to run asynchronously
+
+        :param args: args to call the function with.
+        :param kwargs: keyword args to call the function with.
+
+        This method should return immediately and execute the function
+        with the provided args and kwargs in an asynchronous manner.
+
+        """
+
+    def dispatch_callback(self, callback):
+        """Dispatch to the callback object
+
+        :param callback: A :class:`~kazoo.protocol.states.Callback`
+                         object to be called.
+
+        """
+
+
+class IAsyncResult(object):
+    """An Async Result object that can be queried for a value that has
+    been set asynchronously.
+
+    This object is modeled on the ``gevent`` AsyncResult object.
+
+    The implementation must account for the fact that the :meth:`set`
+    and :meth:`set_exception` methods will be called from within the
+    Zookeeper thread which may require extra care under asynchronous
+    environments.
+
+    .. attribute:: value
+
+        Holds the value passed to :meth:`set` if :meth:`set` was
+        called. Otherwise `None`.
+
+    .. attribute:: exception
+
+        Holds the exception instance passed to :meth:`set_exception`
+        if :meth:`set_exception` was called. Otherwise `None`.
+
+    """
+
+    def ready(self):
+        """Return `True` if and only if it holds a value or an
+        exception"""
+
+    def successful(self):
+        """Return `True` if and only if it is ready and holds a
+        value"""
+
+    def set(self, value=None):
+        """Store the value. Wake up the waiters.
+
+        :param value: Value to store as the result.
+
+        Any waiters blocking on :meth:`get` or :meth:`wait` are woken
+        up. Sequential calls to :meth:`wait` and :meth:`get` will not
+        block at all."""
+
+    def set_exception(self, exception):
+        """Store the exception. Wake up the waiters.
+
+        :param exception: Exception to raise when fetching the value.
+
+        Any waiters blocking on :meth:`get` or :meth:`wait` are woken
+        up. Sequential calls to :meth:`wait` and :meth:`get` will not
+        block at all."""
+
+    def get(self, block=True, timeout=None):
+        """Return the stored value or raise the exception
+
+        :param block: Whether this method should block or return
+                      immediately.
+        :type block: bool
+        :param timeout: How long to wait for a value when `block` is
+                        `True`.
+        :type timeout: float
+
+        If this instance already holds a value / an exception, return /
+        raise it immediately. Otherwise, block until :meth:`set` or
+        :meth:`set_exception` has been called or until the optional
+        timeout occurs."""
+
+    def get_nowait(self):
+        """Return the value or raise the exception without blocking.
+
+        If nothing is available, raise the Timeout exception class on
+        the associated :class:`IHandler` interface."""
+
+    def wait(self, timeout=None):
+        """Block until the instance is ready.
+
+        :param timeout: How long to wait for a value when `block` is
+                        `True`.
+        :type timeout: float
+
+        If this instance already holds a value / an exception, return /
+        raise it immediately. Otherwise, block until :meth:`set` or
+        :meth:`set_exception` has been called or until the optional
+        timeout occurs."""
+
+    def rawlink(self, callback):
+        """Register a callback to call when a value or an exception is
+        set
+
+        :param callback:
+            A callback function to call after :meth:`set` or
+            :meth:`set_exception` has been called. This function will
+            be passed a single argument, this instance.
+        :type callback: func
+
+        """
+
+    def unlink(self, callback):
+        """Remove the callback set by :meth:`rawlink`
+
+        :param callback: A callback function to remove.
+        :type callback: func
+
+        """
diff --git a/slider-agent/src/main/python/kazoo/loggingsupport.py b/slider-agent/src/main/python/kazoo/loggingsupport.py
new file mode 100644
index 0000000..91ae632
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/loggingsupport.py
@@ -0,0 +1,3 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+BLATHER = 5 # log level for low-level debugging
+
diff --git a/slider-agent/src/main/python/kazoo/protocol/__init__.py b/slider-agent/src/main/python/kazoo/protocol/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/protocol/connection.py b/slider-agent/src/main/python/kazoo/protocol/connection.py
new file mode 100644
index 0000000..3cbb87f
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/connection.py
@@ -0,0 +1,624 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Protocol Connection Handler"""
+import logging
+import os
+import random
+import select
+import socket
+import sys
+import time
+from binascii import hexlify
+from contextlib import contextmanager
+
+from kazoo.exceptions import (
+    AuthFailedError,
+    ConnectionDropped,
+    EXCEPTIONS,
+    SessionExpiredError,
+    NoNodeError
+)
+from kazoo.handlers.utils import create_pipe
+from kazoo.loggingsupport import BLATHER
+from kazoo.protocol.serialization import (
+    Auth,
+    Close,
+    Connect,
+    Exists,
+    GetChildren,
+    Ping,
+    PingInstance,
+    ReplyHeader,
+    Transaction,
+    Watch,
+    int_struct
+)
+from kazoo.protocol.states import (
+    Callback,
+    KeeperState,
+    WatchedEvent,
+    EVENT_TYPE_MAP,
+)
+from kazoo.retry import (
+    ForceRetryError,
+    RetryFailedError
+)
+
+log = logging.getLogger(__name__)
+
+
+# Special testing hook objects used to force a session expired error as
+# if it came from the server
+_SESSION_EXPIRED = object()
+_CONNECTION_DROP = object()
+
+STOP_CONNECTING = object()
+
+CREATED_EVENT = 1
+DELETED_EVENT = 2
+CHANGED_EVENT = 3
+CHILD_EVENT = 4
+
+WATCH_XID = -1
+PING_XID = -2
+AUTH_XID = -4
+
+CLOSE_RESPONSE = Close.type
+
+if sys.version_info > (3, ):  # pragma: nocover
+    def buffer(obj, offset=0):
+        return memoryview(obj)[offset:]
+
+    advance_iterator = next
+else:  # pragma: nocover
+    def advance_iterator(it):
+        return it.next()
+
+
+class RWPinger(object):
+    """A Read/Write Server Pinger Iterable
+
+    This object is initialized with the hosts iterator object and the
+    socket creation function. Anytime `next` is called on its iterator
+    it yields either False, or a host, port tuple if it found a r/w
+    capable Zookeeper node.
+
+    After the first run-through of hosts, an exponential back-off delay
+    is added before the next run. This delay is tracked internally and
+    the iterator will yield False if called too soon.
+
+    """
+    def __init__(self, hosts, connection_func, socket_handling):
+        self.hosts = hosts
+        self.connection = connection_func
+        self.last_attempt = None
+        self.socket_handling = socket_handling
+
+    def __iter__(self):
+        if not self.last_attempt:
+            self.last_attempt = time.time()
+        delay = 0.5
+        while True:
+            yield self._next_server(delay)
+
+    def _next_server(self, delay):
+        jitter = random.randint(0, 100) / 100.0
+        while time.time() < self.last_attempt + delay + jitter:
+            # Skip rw ping checks if its too soon
+            return False
+        for host, port in self.hosts:
+            log.debug("Pinging server for r/w: %s:%s", host, port)
+            self.last_attempt = time.time()
+            try:
+                with self.socket_handling():
+                    sock = self.connection((host, port))
+                    sock.sendall(b"isro")
+                    result = sock.recv(8192)
+                    sock.close()
+                    if result == b'rw':
+                        return (host, port)
+                    else:
+                        return False
+            except ConnectionDropped:
+                return False
+
+            # Add some jitter between host pings
+            while time.time() < self.last_attempt + jitter:
+                return False
+        delay *= 2
+
+
+class RWServerAvailable(Exception):
+    """Thrown if a RW Server becomes available"""
+
+
+class ConnectionHandler(object):
+    """Zookeeper connection handler"""
+    def __init__(self, client, retry_sleeper, logger=None):
+        self.client = client
+        self.handler = client.handler
+        self.retry_sleeper = retry_sleeper
+        self.logger = logger or log
+
+        # Our event objects
+        self.connection_closed = client.handler.event_object()
+        self.connection_closed.set()
+        self.connection_stopped = client.handler.event_object()
+        self.connection_stopped.set()
+        self.ping_outstanding = client.handler.event_object()
+
+        self._read_pipe = None
+        self._write_pipe = None
+
+        self._socket = None
+        self._xid = None
+        self._rw_server = None
+        self._ro_mode = False
+
+        self._connection_routine = None
+
+    # This is instance specific to avoid odd thread bug issues in Python
+    # during shutdown global cleanup
+    @contextmanager
+    def _socket_error_handling(self):
+        try:
+            yield
+        except (socket.error, select.error) as e:
+            err = getattr(e, 'strerror', e)
+            raise ConnectionDropped("socket connection error: %s" % (err,))
+
+    def start(self):
+        """Start the connection up"""
+        if self.connection_closed.is_set():
+            self._read_pipe, self._write_pipe = create_pipe()
+            self.connection_closed.clear()
+        if self._connection_routine:
+            raise Exception("Unable to start, connection routine already "
+                            "active.")
+        self._connection_routine = self.handler.spawn(self.zk_loop)
+
+    def stop(self, timeout=None):
+        """Ensure the writer has stopped, wait to see if it does."""
+        self.connection_stopped.wait(timeout)
+        if self._connection_routine:
+            self._connection_routine.join()
+            self._connection_routine = None
+        return self.connection_stopped.is_set()
+
+    def close(self):
+        """Release resources held by the connection
+
+        The connection can be restarted afterwards.
+        """
+        if not self.connection_stopped.is_set():
+            raise Exception("Cannot close connection until it is stopped")
+        self.connection_closed.set()
+        wp, rp = self._write_pipe, self._read_pipe
+        self._write_pipe = self._read_pipe = None
+        if wp is not None:
+            os.close(wp)
+        if rp is not None:
+            os.close(rp)
+
+    def _server_pinger(self):
+        """Returns a server pinger iterable, that will ping the next
+        server in the list, and apply a back-off between attempts."""
+        return RWPinger(self.client.hosts, self.handler.create_connection,
+                        self._socket_error_handling)
+
+    def _read_header(self, timeout):
+        b = self._read(4, timeout)
+        length = int_struct.unpack(b)[0]
+        b = self._read(length, timeout)
+        header, offset = ReplyHeader.deserialize(b, 0)
+        return header, b, offset
+
+    def _read(self, length, timeout):
+        msgparts = []
+        remaining = length
+        with self._socket_error_handling():
+            while remaining > 0:
+                s = self.handler.select([self._socket], [], [], timeout)[0]
+                if not s:  # pragma: nocover
+                    # If the read list is empty, we got a timeout. We don't
+                    # have to check wlist and xlist as we don't set any
+                    raise self.handler.timeout_exception("socket time-out")
+
+                chunk = self._socket.recv(remaining)
+                if chunk == b'':
+                    raise ConnectionDropped('socket connection broken')
+                msgparts.append(chunk)
+                remaining -= len(chunk)
+            return b"".join(msgparts)
+
+    def _invoke(self, timeout, request, xid=None):
+        """A special writer used during connection establishment
+        only"""
+        self._submit(request, timeout, xid)
+        zxid = None
+        if xid:
+            header, buffer, offset = self._read_header(timeout)
+            if header.xid != xid:
+                raise RuntimeError('xids do not match, expected %r received %r',
+                                   xid, header.xid)
+            if header.zxid > 0:
+                zxid = header.zxid
+            if header.err:
+                callback_exception = EXCEPTIONS[header.err]()
+                self.logger.debug(
+                    'Received error(xid=%s) %r', xid, callback_exception)
+                raise callback_exception
+            return zxid
+
+        msg = self._read(4, timeout)
+        length = int_struct.unpack(msg)[0]
+        msg = self._read(length, timeout)
+
+        if hasattr(request, 'deserialize'):
+            try:
+                obj, _ = request.deserialize(msg, 0)
+            except Exception:
+                self.logger.exception("Exception raised during deserialization"
+                                      " of request: %s", request)
+
+                # raise ConnectionDropped so connect loop will retry
+                raise ConnectionDropped('invalid server response')
+            self.logger.log(BLATHER, 'Read response %s', obj)
+            return obj, zxid
+
+        return zxid
+
+    def _submit(self, request, timeout, xid=None):
+        """Submit a request object with a timeout value and optional
+        xid"""
+        b = bytearray()
+        if xid:
+            b.extend(int_struct.pack(xid))
+        if request.type:
+            b.extend(int_struct.pack(request.type))
+        b += request.serialize()
+        self.logger.log((BLATHER if isinstance(request, Ping) else logging.DEBUG),
+                        "Sending request(xid=%s): %s", xid, request)
+        self._write(int_struct.pack(len(b)) + b, timeout)
+
+    def _write(self, msg, timeout):
+        """Write a raw msg to the socket"""
+        sent = 0
+        msg_length = len(msg)
+        with self._socket_error_handling():
+            while sent < msg_length:
+                s = self.handler.select([], [self._socket], [], timeout)[1]
+                if not s:  # pragma: nocover
+                    # If the write list is empty, we got a timeout. We don't
+                    # have to check rlist and xlist as we don't set any
+                    raise self.handler.timeout_exception("socket time-out")
+                msg_slice = buffer(msg, sent)
+                bytes_sent = self._socket.send(msg_slice)
+                if not bytes_sent:
+                    raise ConnectionDropped('socket connection broken')
+                sent += bytes_sent
+
+    def _read_watch_event(self, buffer, offset):
+        client = self.client
+        watch, offset = Watch.deserialize(buffer, offset)
+        path = watch.path
+
+        self.logger.debug('Received EVENT: %s', watch)
+
+        watchers = []
+
+        if watch.type in (CREATED_EVENT, CHANGED_EVENT):
+            watchers.extend(client._data_watchers.pop(path, []))
+        elif watch.type == DELETED_EVENT:
+            watchers.extend(client._data_watchers.pop(path, []))
+            watchers.extend(client._child_watchers.pop(path, []))
+        elif watch.type == CHILD_EVENT:
+            watchers.extend(client._child_watchers.pop(path, []))
+        else:
+            self.logger.warn('Received unknown event %r', watch.type)
+            return
+
+        # Strip the chroot if needed
+        path = client.unchroot(path)
+        ev = WatchedEvent(EVENT_TYPE_MAP[watch.type], client._state, path)
+
+        # Last check to ignore watches if we've been stopped
+        if client._stopped.is_set():
+            return
+
+        # Dump the watchers to the watch thread
+        for watch in watchers:
+            client.handler.dispatch_callback(Callback('watch', watch, (ev,)))
+
+    def _read_response(self, header, buffer, offset):
+        client = self.client
+        request, async_object, xid = client._pending.popleft()
+        if header.zxid and header.zxid > 0:
+            client.last_zxid = header.zxid
+        if header.xid != xid:
+            raise RuntimeError('xids do not match, expected %r '
+                               'received %r', xid, header.xid)
+
+        # Determine if its an exists request and a no node error
+        exists_error = (header.err == NoNodeError.code and
+                        request.type == Exists.type)
+
+        # Set the exception if its not an exists error
+        if header.err and not exists_error:
+            callback_exception = EXCEPTIONS[header.err]()
+            self.logger.debug(
+                'Received error(xid=%s) %r', xid, callback_exception)
+            if async_object:
+                async_object.set_exception(callback_exception)
+        elif request and async_object:
+            if exists_error:
+                # It's a NoNodeError, which is fine for an exists
+                # request
+                async_object.set(None)
+            else:
+                try:
+                    response = request.deserialize(buffer, offset)
+                except Exception as exc:
+                    self.logger.exception("Exception raised during deserialization"
+                                          " of request: %s", request)
+                    async_object.set_exception(exc)
+                    return
+                self.logger.debug(
+                    'Received response(xid=%s): %r', xid, response)
+
+                # We special case a Transaction as we have to unchroot things
+                if request.type == Transaction.type:
+                    response = Transaction.unchroot(client, response)
+
+                async_object.set(response)
+
+            # Determine if watchers should be registered
+            watcher = getattr(request, 'watcher', None)
+            if not client._stopped.is_set() and watcher:
+                if isinstance(request, GetChildren):
+                    client._child_watchers[request.path].add(watcher)
+                else:
+                    client._data_watchers[request.path].add(watcher)
+
+        if isinstance(request, Close):
+            self.logger.log(BLATHER, 'Read close response')
+            return CLOSE_RESPONSE
+
+    def _read_socket(self, read_timeout):
+        """Called when there's something to read on the socket"""
+        client = self.client
+
+        header, buffer, offset = self._read_header(read_timeout)
+        if header.xid == PING_XID:
+            self.logger.log(BLATHER, 'Received Ping')
+            self.ping_outstanding.clear()
+        elif header.xid == AUTH_XID:
+            self.logger.log(BLATHER, 'Received AUTH')
+
+            request, async_object, xid = client._pending.popleft()
+            if header.err:
+                async_object.set_exception(AuthFailedError())
+                client._session_callback(KeeperState.AUTH_FAILED)
+            else:
+                async_object.set(True)
+        elif header.xid == WATCH_XID:
+            self._read_watch_event(buffer, offset)
+        else:
+            self.logger.log(BLATHER, 'Reading for header %r', header)
+
+            return self._read_response(header, buffer, offset)
+
+    def _send_request(self, read_timeout, connect_timeout):
+        """Called when we have something to send out on the socket"""
+        client = self.client
+        try:
+            request, async_object = client._queue[0]
+        except IndexError:
+            # Not actually something on the queue, this can occur if
+            # something happens to cancel the request such that we
+            # don't clear the pipe below after sending
+            try:
+                # Clear possible inconsistence (no request in the queue
+                # but have data in the read pipe), which causes cpu to spin.
+                os.read(self._read_pipe, 1)
+            except OSError:
+                pass
+            return
+
+        # Special case for testing, if this is a _SessionExpire object
+        # then throw a SessionExpiration error as if we were dropped
+        if request is _SESSION_EXPIRED:
+            raise SessionExpiredError("Session expired: Testing")
+        if request is _CONNECTION_DROP:
+            raise ConnectionDropped("Connection dropped: Testing")
+
+        # Special case for auth packets
+        if request.type == Auth.type:
+            xid = AUTH_XID
+        else:
+            self._xid += 1
+            xid = self._xid
+
+        self._submit(request, connect_timeout, xid)
+        client._queue.popleft()
+        os.read(self._read_pipe, 1)
+        client._pending.append((request, async_object, xid))
+
+    def _send_ping(self, connect_timeout):
+        self.ping_outstanding.set()
+        self._submit(PingInstance, connect_timeout, PING_XID)
+
+        # Determine if we need to check for a r/w server
+        if self._ro_mode:
+            result = advance_iterator(self._ro_mode)
+            if result:
+                self._rw_server = result
+                raise RWServerAvailable()
+
+    def zk_loop(self):
+        """Main Zookeeper handling loop"""
+        self.logger.log(BLATHER, 'ZK loop started')
+
+        self.connection_stopped.clear()
+
+        retry = self.retry_sleeper.copy()
+        try:
+            while not self.client._stopped.is_set():
+                # If the connect_loop returns STOP_CONNECTING, stop retrying
+                if retry(self._connect_loop, retry) is STOP_CONNECTING:
+                    break
+        except RetryFailedError:
+            self.logger.warning("Failed connecting to Zookeeper "
+                                "within the connection retry policy.")
+        finally:
+            self.connection_stopped.set()
+            self.client._session_callback(KeeperState.CLOSED)
+            self.logger.log(BLATHER, 'Connection stopped')
+
+    def _connect_loop(self, retry):
+        # Iterate through the hosts a full cycle before starting over
+        status = None
+        for host, port in self.client.hosts:
+            if self.client._stopped.is_set():
+                status = STOP_CONNECTING
+                break
+            status = self._connect_attempt(host, port, retry)
+            if status is STOP_CONNECTING:
+                break
+
+        if status is STOP_CONNECTING:
+            return STOP_CONNECTING
+        else:
+            raise ForceRetryError('Reconnecting')
+
+    def _connect_attempt(self, host, port, retry):
+        client = self.client
+        TimeoutError = self.handler.timeout_exception
+        close_connection = False
+
+        self._socket = None
+
+        # Were we given a r/w server? If so, use that instead
+        if self._rw_server:
+            self.logger.log(BLATHER,
+                            "Found r/w server to use, %s:%s", host, port)
+            host, port = self._rw_server
+            self._rw_server = None
+
+        if client._state != KeeperState.CONNECTING:
+            client._session_callback(KeeperState.CONNECTING)
+
+        try:
+            read_timeout, connect_timeout = self._connect(host, port)
+            read_timeout = read_timeout / 1000.0
+            connect_timeout = connect_timeout / 1000.0
+            retry.reset()
+            self._xid = 0
+
+            while not close_connection:
+                # Watch for something to read or send
+                jitter_time = random.randint(0, 40) / 100.0
+                # Ensure our timeout is positive
+                timeout = max([read_timeout / 2.0 - jitter_time, jitter_time])
+                s = self.handler.select([self._socket, self._read_pipe],
+                                        [], [], timeout)[0]
+
+                if not s:
+                    if self.ping_outstanding.is_set():
+                        self.ping_outstanding.clear()
+                        raise ConnectionDropped(
+                            "outstanding heartbeat ping not received")
+                    self._send_ping(connect_timeout)
+                elif s[0] == self._socket:
+                    response = self._read_socket(read_timeout)
+                    close_connection = response == CLOSE_RESPONSE
+                else:
+                    self._send_request(read_timeout, connect_timeout)
+
+            self.logger.info('Closing connection to %s:%s', host, port)
+            client._session_callback(KeeperState.CLOSED)
+            return STOP_CONNECTING
+        except (ConnectionDropped, TimeoutError) as e:
+            if isinstance(e, ConnectionDropped):
+                self.logger.warning('Connection dropped: %s', e)
+            else:
+                self.logger.warning('Connection time-out')
+            if client._state != KeeperState.CONNECTING:
+                self.logger.warning("Transition to CONNECTING")
+                client._session_callback(KeeperState.CONNECTING)
+        except AuthFailedError:
+            retry.reset()
+            self.logger.warning('AUTH_FAILED closing')
+            client._session_callback(KeeperState.AUTH_FAILED)
+            return STOP_CONNECTING
+        except SessionExpiredError:
+            retry.reset()
+            self.logger.warning('Session has expired')
+            client._session_callback(KeeperState.EXPIRED_SESSION)
+        except RWServerAvailable:
+            retry.reset()
+            self.logger.warning('Found a RW server, dropping connection')
+            client._session_callback(KeeperState.CONNECTING)
+        except Exception:
+            self.logger.exception('Unhandled exception in connection loop')
+            raise
+        finally:
+            if self._socket is not None:
+                self._socket.close()
+
+    def _connect(self, host, port):
+        client = self.client
+        self.logger.info('Connecting to %s:%s', host, port)
+
+        self.logger.log(BLATHER,
+                          '    Using session_id: %r session_passwd: %s',
+                          client._session_id,
+                          hexlify(client._session_passwd))
+
+        with self._socket_error_handling():
+            self._socket = self.handler.create_connection(
+                (host, port), client._session_timeout / 1000.0)
+
+        self._socket.setblocking(0)
+
+        connect = Connect(0, client.last_zxid, client._session_timeout,
+                          client._session_id or 0, client._session_passwd,
+                          client.read_only)
+
+        connect_result, zxid = self._invoke(client._session_timeout, connect)
+
+        if connect_result.time_out <= 0:
+            raise SessionExpiredError("Session has expired")
+
+        if zxid:
+            client.last_zxid = zxid
+
+        # Load return values
+        client._session_id = connect_result.session_id
+        client._protocol_version = connect_result.protocol_version
+        negotiated_session_timeout = connect_result.time_out
+        connect_timeout = negotiated_session_timeout / len(client.hosts)
+        read_timeout = negotiated_session_timeout * 2.0 / 3.0
+        client._session_passwd = connect_result.passwd
+
+        self.logger.log(BLATHER,
+                          'Session created, session_id: %r session_passwd: %s\n'
+                          '    negotiated session timeout: %s\n'
+                          '    connect timeout: %s\n'
+                          '    read timeout: %s', client._session_id,
+                          hexlify(client._session_passwd),
+                          negotiated_session_timeout, connect_timeout,
+                          read_timeout)
+
+        if connect_result.read_only:
+            client._session_callback(KeeperState.CONNECTED_RO)
+            self._ro_mode = iter(self._server_pinger())
+        else:
+            client._session_callback(KeeperState.CONNECTED)
+            self._ro_mode = None
+
+        for scheme, auth in client.auth_data:
+            ap = Auth(0, scheme, auth)
+            zxid = self._invoke(connect_timeout, ap, xid=AUTH_XID)
+            if zxid:
+                client.last_zxid = zxid
+        return read_timeout, connect_timeout
diff --git a/slider-agent/src/main/python/kazoo/protocol/paths.py b/slider-agent/src/main/python/kazoo/protocol/paths.py
new file mode 100644
index 0000000..e37c1a7
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/paths.py
@@ -0,0 +1,55 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+def normpath(path, trailing=False):
+    """Normalize path, eliminating double slashes, etc."""
+    comps = path.split('/')
+    new_comps = []
+    for comp in comps:
+        if comp == '':
+            continue
+        if comp in ('.', '..'):
+            raise ValueError('relative paths not allowed')
+        new_comps.append(comp)
+    new_path = '/'.join(new_comps)
+    if trailing is True and path.endswith('/'):
+        new_path += '/'
+    if path.startswith('/'):
+        return '/' + new_path
+    return new_path
+
+
+def join(a, *p):
+    """Join two or more pathname components, inserting '/' as needed.
+
+    If any component is an absolute path, all previous path components
+    will be discarded.
+
+    """
+    path = a
+    for b in p:
+        if b.startswith('/'):
+            path = b
+        elif path == '' or path.endswith('/'):
+            path += b
+        else:
+            path += '/' + b
+    return path
+
+
+def isabs(s):
+    """Test whether a path is absolute"""
+    return s.startswith('/')
+
+
+def basename(p):
+    """Returns the final component of a pathname"""
+    i = p.rfind('/') + 1
+    return p[i:]
+
+
+def _prefix_root(root, path, trailing=False):
+    """Prepend a root to a path. """
+    return normpath(join(_norm_root(root), path.lstrip('/')), trailing=trailing)
+
+
+def _norm_root(root):
+    return normpath(join('/', root))
diff --git a/slider-agent/src/main/python/kazoo/protocol/serialization.py b/slider-agent/src/main/python/kazoo/protocol/serialization.py
new file mode 100644
index 0000000..3b5df6c
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/serialization.py
@@ -0,0 +1,397 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Serializers, Deserializers, and NamedTuple objects"""
+from collections import namedtuple
+import struct
+
+from kazoo.exceptions import EXCEPTIONS
+from kazoo.protocol.states import ZnodeStat
+from kazoo.security import ACL
+from kazoo.security import Id
+
+# Struct objects with formats compiled
+bool_struct = struct.Struct('B')
+int_struct = struct.Struct('!i')
+int_int_struct = struct.Struct('!ii')
+int_int_long_struct = struct.Struct('!iiq')
+
+int_long_int_long_struct = struct.Struct('!iqiq')
+multiheader_struct = struct.Struct('!iBi')
+reply_header_struct = struct.Struct('!iqi')
+stat_struct = struct.Struct('!qqqqiiiqiiq')
+
+try:  # pragma: nocover
+    basestring
+except NameError:
+    basestring = str
+
+
+def read_string(buffer, offset):
+    """Reads an int specified buffer into a string and returns the
+    string and the new offset in the buffer"""
+    length = int_struct.unpack_from(buffer, offset)[0]
+    offset += int_struct.size
+    if length < 0:
+        return None, offset
+    else:
+        index = offset
+        offset += length
+        return buffer[index:index + length].decode('utf-8'), offset
+
+
+def read_acl(bytes, offset):
+    perms = int_struct.unpack_from(bytes, offset)[0]
+    offset += int_struct.size
+    scheme, offset = read_string(bytes, offset)
+    id, offset = read_string(bytes, offset)
+    return ACL(perms, Id(scheme, id)), offset
+
+
+def write_string(bytes):
+    if not bytes:
+        return int_struct.pack(-1)
+    else:
+        utf8_str = bytes.encode('utf-8')
+        return int_struct.pack(len(utf8_str)) + utf8_str
+
+
+def write_buffer(bytes):
+    if bytes is None:
+        return int_struct.pack(-1)
+    else:
+        return int_struct.pack(len(bytes)) + bytes
+
+
+def read_buffer(bytes, offset):
+    length = int_struct.unpack_from(bytes, offset)[0]
+    offset += int_struct.size
+    if length < 0:
+        return None, offset
+    else:
+        index = offset
+        offset += length
+        return bytes[index:index + length], offset
+
+
+class Close(namedtuple('Close', '')):
+    type = -11
+
+    @classmethod
+    def serialize(cls):
+        return b''
+
+CloseInstance = Close()
+
+
+class Ping(namedtuple('Ping', '')):
+    type = 11
+
+    @classmethod
+    def serialize(cls):
+        return b''
+
+PingInstance = Ping()
+
+
+class Connect(namedtuple('Connect', 'protocol_version last_zxid_seen'
+                         ' time_out session_id passwd read_only')):
+    type = None
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(int_long_int_long_struct.pack(
+            self.protocol_version, self.last_zxid_seen, self.time_out,
+            self.session_id))
+        b.extend(write_buffer(self.passwd))
+        b.extend([1 if self.read_only else 0])
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        proto_version, timeout, session_id = int_int_long_struct.unpack_from(
+            bytes, offset)
+        offset += int_int_long_struct.size
+        password, offset = read_buffer(bytes, offset)
+
+        try:
+            read_only = bool_struct.unpack_from(bytes, offset)[0] is 1
+            offset += bool_struct.size
+        except struct.error:
+            read_only = False
+        return cls(proto_version, 0, timeout, session_id, password,
+                   read_only), offset
+
+
+class Create(namedtuple('Create', 'path data acl flags')):
+    type = 1
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend(write_buffer(self.data))
+        b.extend(int_struct.pack(len(self.acl)))
+        for acl in self.acl:
+            b.extend(int_struct.pack(acl.perms) +
+                     write_string(acl.id.scheme) + write_string(acl.id.id))
+        b.extend(int_struct.pack(self.flags))
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        return read_string(bytes, offset)[0]
+
+
+class Delete(namedtuple('Delete', 'path version')):
+    type = 2
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend(int_struct.pack(self.version))
+        return b
+
+    @classmethod
+    def deserialize(self, bytes, offset):
+        return True
+
+
+class Exists(namedtuple('Exists', 'path watcher')):
+    type = 3
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend([1 if self.watcher else 0])
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+        return stat if stat.czxid != -1 else None
+
+
+class GetData(namedtuple('GetData', 'path watcher')):
+    type = 4
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend([1 if self.watcher else 0])
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        data, offset = read_buffer(bytes, offset)
+        stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+        return data, stat
+
+
+class SetData(namedtuple('SetData', 'path data version')):
+    type = 5
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend(write_buffer(self.data))
+        b.extend(int_struct.pack(self.version))
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        return ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+
+
+class GetACL(namedtuple('GetACL', 'path')):
+    type = 6
+
+    def serialize(self):
+        return bytearray(write_string(self.path))
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        count = int_struct.unpack_from(bytes, offset)[0]
+        offset += int_struct.size
+        if count == -1:  # pragma: nocover
+            return []
+
+        acls = []
+        for c in range(count):
+            acl, offset = read_acl(bytes, offset)
+            acls.append(acl)
+        stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+        return acls, stat
+
+
+class SetACL(namedtuple('SetACL', 'path acls version')):
+    type = 7
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend(int_struct.pack(len(self.acls)))
+        for acl in self.acls:
+            b.extend(int_struct.pack(acl.perms) +
+                     write_string(acl.id.scheme) + write_string(acl.id.id))
+        b.extend(int_struct.pack(self.version))
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        return ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+
+
+class GetChildren(namedtuple('GetChildren', 'path watcher')):
+    type = 8
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend([1 if self.watcher else 0])
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        count = int_struct.unpack_from(bytes, offset)[0]
+        offset += int_struct.size
+        if count == -1:  # pragma: nocover
+            return []
+
+        children = []
+        for c in range(count):
+            child, offset = read_string(bytes, offset)
+            children.append(child)
+        return children
+
+
+class Sync(namedtuple('Sync', 'path')):
+    type = 9
+
+    def serialize(self):
+        return write_string(self.path)
+
+    @classmethod
+    def deserialize(cls, buffer, offset):
+        return read_string(buffer, offset)[0]
+
+
+class GetChildren2(namedtuple('GetChildren2', 'path watcher')):
+    type = 12
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend([1 if self.watcher else 0])
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        count = int_struct.unpack_from(bytes, offset)[0]
+        offset += int_struct.size
+        if count == -1:  # pragma: nocover
+            return []
+
+        children = []
+        for c in range(count):
+            child, offset = read_string(bytes, offset)
+            children.append(child)
+        stat = ZnodeStat._make(stat_struct.unpack_from(bytes, offset))
+        return children, stat
+
+
+class CheckVersion(namedtuple('CheckVersion', 'path version')):
+    type = 13
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend(int_struct.pack(self.version))
+        return b
+
+
+class Transaction(namedtuple('Transaction', 'operations')):
+    type = 14
+
+    def serialize(self):
+        b = bytearray()
+        for op in self.operations:
+            b.extend(MultiHeader(op.type, False, -1).serialize() +
+                     op.serialize())
+        return b + multiheader_struct.pack(-1, True, -1)
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        header = MultiHeader(None, False, None)
+        results = []
+        response = None
+        while not header.done:
+            if header.type == Create.type:
+                response, offset = read_string(bytes, offset)
+            elif header.type == Delete.type:
+                response = True
+            elif header.type == SetData.type:
+                response = ZnodeStat._make(
+                    stat_struct.unpack_from(bytes, offset))
+                offset += stat_struct.size
+            elif header.type == CheckVersion.type:
+                response = True
+            elif header.type == -1:
+                err = int_struct.unpack_from(bytes, offset)[0]
+                offset += int_struct.size
+                response = EXCEPTIONS[err]()
+            if response:
+                results.append(response)
+            header, offset = MultiHeader.deserialize(bytes, offset)
+        return results
+
+    @staticmethod
+    def unchroot(client, response):
+        resp = []
+        for result in response:
+            if isinstance(result, basestring):
+                resp.append(client.unchroot(result))
+            else:
+                resp.append(result)
+        return resp
+
+
+class Auth(namedtuple('Auth', 'auth_type scheme auth')):
+    type = 100
+
+    def serialize(self):
+        return (int_struct.pack(self.auth_type) + write_string(self.scheme) +
+                write_string(self.auth))
+
+
+class Watch(namedtuple('Watch', 'type state path')):
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        """Given bytes and the current bytes offset, return the
+        type, state, path, and new offset"""
+        type, state = int_int_struct.unpack_from(bytes, offset)
+        offset += int_int_struct.size
+        path, offset = read_string(bytes, offset)
+        return cls(type, state, path), offset
+
+
+class ReplyHeader(namedtuple('ReplyHeader', 'xid, zxid, err')):
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        """Given bytes and the current bytes offset, return a
+        :class:`ReplyHeader` instance and the new offset"""
+        new_offset = offset + reply_header_struct.size
+        return cls._make(
+            reply_header_struct.unpack_from(bytes, offset)), new_offset
+
+
+class MultiHeader(namedtuple('MultiHeader', 'type done err')):
+    def serialize(self):
+        b = bytearray()
+        b.extend(int_struct.pack(self.type))
+        b.extend([1 if self.done else 0])
+        b.extend(int_struct.pack(self.err))
+        return b
+
+    @classmethod
+    def deserialize(cls, bytes, offset):
+        t, done, err = multiheader_struct.unpack_from(bytes, offset)
+        offset += multiheader_struct.size
+        return cls(t, done is 1, err), offset
diff --git a/slider-agent/src/main/python/kazoo/protocol/states.py b/slider-agent/src/main/python/kazoo/protocol/states.py
new file mode 100644
index 0000000..d09c2c6
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/protocol/states.py
@@ -0,0 +1,238 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo State and Event objects"""
+from collections import namedtuple
+
+
+class KazooState(object):
+    """High level connection state values
+
+    States inspired by Netflix Curator.
+
+    .. attribute:: SUSPENDED
+
+        The connection has been lost but may be recovered. We should
+        operate in a "safe mode" until then. When the connection is
+        resumed, it may be discovered that the session expired. A
+        client should not assume that locks are valid during this
+        time.
+
+    .. attribute:: CONNECTED
+
+        The connection is alive and well.
+
+    .. attribute:: LOST
+
+        The connection has been confirmed dead. Any ephemeral nodes
+        will need to be recreated upon re-establishing a connection.
+        If locks were acquired or recipes using ephemeral nodes are in
+        use, they can be considered lost as well.
+
+    """
+    SUSPENDED = "SUSPENDED"
+    CONNECTED = "CONNECTED"
+    LOST = "LOST"
+
+
+class KeeperState(object):
+    """Zookeeper State
+
+    Represents the Zookeeper state. Watch functions will receive a
+    :class:`KeeperState` attribute as their state argument.
+
+    .. attribute:: AUTH_FAILED
+
+        Authentication has failed, this is an unrecoverable error.
+
+    .. attribute:: CONNECTED
+
+        Zookeeper is connected.
+
+    .. attribute:: CONNECTED_RO
+
+        Zookeeper is connected in read-only state.
+
+    .. attribute:: CONNECTING
+
+        Zookeeper is currently attempting to establish a connection.
+
+    .. attribute:: EXPIRED_SESSION
+
+        The prior session was invalid, all prior ephemeral nodes are
+        gone.
+
+    """
+    AUTH_FAILED = 'AUTH_FAILED'
+    CONNECTED = 'CONNECTED'
+    CONNECTED_RO = 'CONNECTED_RO'
+    CONNECTING = 'CONNECTING'
+    CLOSED = 'CLOSED'
+    EXPIRED_SESSION = 'EXPIRED_SESSION'
+
+
+class EventType(object):
+    """Zookeeper Event
+
+    Represents a Zookeeper event. Events trigger watch functions which
+    will receive a :class:`EventType` attribute as their event
+    argument.
+
+    .. attribute:: CREATED
+
+        A node has been created.
+
+    .. attribute:: DELETED
+
+        A node has been deleted.
+
+    .. attribute:: CHANGED
+
+        The data for a node has changed.
+
+    .. attribute:: CHILD
+
+        The children under a node have changed (a child was added or
+        removed). This event does not indicate the data for a child
+        node has changed, which must have its own watch established.
+
+    """
+    CREATED = 'CREATED'
+    DELETED = 'DELETED'
+    CHANGED = 'CHANGED'
+    CHILD = 'CHILD'
+
+EVENT_TYPE_MAP = {
+    1: EventType.CREATED,
+    2: EventType.DELETED,
+    3: EventType.CHANGED,
+    4: EventType.CHILD
+}
+
+
+class WatchedEvent(namedtuple('WatchedEvent', ('type', 'state', 'path'))):
+    """A change on ZooKeeper that a Watcher is able to respond to.
+
+    The :class:`WatchedEvent` includes exactly what happened, the
+    current state of ZooKeeper, and the path of the node that was
+    involved in the event. An instance of :class:`WatchedEvent` will be
+    passed to registered watch functions.
+
+    .. attribute:: type
+
+        A :class:`EventType` attribute indicating the event type.
+
+    .. attribute:: state
+
+        A :class:`KeeperState` attribute indicating the Zookeeper
+        state.
+
+    .. attribute:: path
+
+        The path of the node for the watch event.
+
+    """
+
+
+class Callback(namedtuple('Callback', ('type', 'func', 'args'))):
+    """A callback that is handed to a handler for dispatch
+
+    :param type: Type of the callback, currently is only 'watch'
+    :param func: Callback function
+    :param args: Argument list for the callback function
+
+    """
+
+
+class ZnodeStat(namedtuple('ZnodeStat', 'czxid mzxid ctime mtime version'
+                           ' cversion aversion ephemeralOwner dataLength'
+                           ' numChildren pzxid')):
+    """A ZnodeStat structure with convenience properties
+
+    When getting the value of a node from Zookeeper, the properties for
+    the node known as a "Stat structure" will be retrieved. The
+    :class:`ZnodeStat` object provides access to the standard Stat
+    properties and additional properties that are more readable and use
+    Python time semantics (seconds since epoch instead of ms).
+
+    .. note::
+
+        The original Zookeeper Stat name is in parens next to the name
+        when it differs from the convenience attribute. These are **not
+        functions**, just attributes.
+
+    .. attribute:: creation_transaction_id (czxid)
+
+        The transaction id of the change that caused this znode to be
+        created.
+
+    .. attribute:: last_modified_transaction_id (mzxid)
+
+        The transaction id of the change that last modified this znode.
+
+    .. attribute:: created (ctime)
+
+        The time in seconds from epoch when this node was created.
+        (ctime is in milliseconds)
+
+    .. attribute:: last_modified (mtime)
+
+        The time in seconds from epoch when this znode was last
+        modified. (mtime is in milliseconds)
+
+    .. attribute:: version
+
+        The number of changes to the data of this znode.
+
+    .. attribute:: acl_version (aversion)
+
+        The number of changes to the ACL of this znode.
+
+    .. attribute:: owner_session_id (ephemeralOwner)
+
+        The session id of the owner of this znode if the znode is an
+        ephemeral node. If it is not an ephemeral node, it will be
+        `None`. (ephemeralOwner will be 0 if it is not ephemeral)
+
+    .. attribute:: data_length (dataLength)
+
+        The length of the data field of this znode.
+
+    .. attribute:: children_count (numChildren)
+
+        The number of children of this znode.
+
+    """
+    @property
+    def acl_version(self):
+        return self.aversion
+
+    @property
+    def children_version(self):
+        return self.cversion
+
+    @property
+    def created(self):
+        return self.ctime / 1000.0
+
+    @property
+    def last_modified(self):
+        return self.mtime / 1000.0
+
+    @property
+    def owner_session_id(self):
+        return self.ephemeralOwner or None
+
+    @property
+    def creation_transaction_id(self):
+        return self.czxid
+
+    @property
+    def last_modified_transaction_id(self):
+        return self.mzxid
+
+    @property
+    def data_length(self):
+        return self.dataLength
+
+    @property
+    def children_count(self):
+        return self.numChildren
diff --git a/slider-agent/src/main/python/kazoo/recipe/__init__.py b/slider-agent/src/main/python/kazoo/recipe/__init__.py
new file mode 100644
index 0000000..a7bacf3
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/__init__.py
@@ -0,0 +1,2 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
diff --git a/slider-agent/src/main/python/kazoo/recipe/barrier.py b/slider-agent/src/main/python/kazoo/recipe/barrier.py
new file mode 100644
index 0000000..729ac07
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/barrier.py
@@ -0,0 +1,215 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Barriers
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+import os
+import socket
+import uuid
+
+from kazoo.protocol.states import EventType
+from kazoo.exceptions import KazooException
+from kazoo.exceptions import NoNodeError
+from kazoo.exceptions import NodeExistsError
+
+
+class Barrier(object):
+    """Kazoo Barrier
+
+    Implements a barrier to block processing of a set of nodes until
+    a condition is met at which point the nodes will be allowed to
+    proceed. The barrier is in place if its node exists.
+
+    .. warning::
+
+        The :meth:`wait` function does not handle connection loss and
+        may raise :exc:`~kazoo.exceptions.ConnectionLossException` if
+        the connection is lost while waiting.
+
+    """
+    def __init__(self, client, path):
+        """Create a Kazoo Barrier
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The barrier path to use.
+
+        """
+        self.client = client
+        self.path = path
+
+    def create(self):
+        """Establish the barrier if it doesn't exist already"""
+        self.client.retry(self.client.ensure_path, self.path)
+
+    def remove(self):
+        """Remove the barrier
+
+        :returns: Whether the barrier actually needed to be removed.
+        :rtype: bool
+
+        """
+        try:
+            self.client.retry(self.client.delete, self.path)
+            return True
+        except NoNodeError:
+            return False
+
+    def wait(self, timeout=None):
+        """Wait on the barrier to be cleared
+
+        :returns: True if the barrier has been cleared, otherwise
+                  False.
+        :rtype: bool
+
+        """
+        cleared = self.client.handler.event_object()
+
+        def wait_for_clear(event):
+            if event.type == EventType.DELETED:
+                cleared.set()
+
+        exists = self.client.exists(self.path, watch=wait_for_clear)
+        if not exists:
+            return True
+
+        cleared.wait(timeout)
+        return cleared.is_set()
+
+
+class DoubleBarrier(object):
+    """Kazoo Double Barrier
+
+    Double barriers are used to synchronize the beginning and end of
+    a distributed task. The barrier blocks when entering it until all
+    the members have joined, and blocks when leaving until all the
+    members have left.
+
+    .. note::
+
+        You should register a listener for session loss as the process
+        will no longer be part of the barrier once the session is
+        gone. Connection losses will be retried with the default retry
+        policy.
+
+    """
+    def __init__(self, client, path, num_clients, identifier=None):
+        """Create a Double Barrier
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The barrier path to use.
+        :param num_clients: How many clients must enter the barrier to
+                            proceed.
+        :type num_clients: int
+        :param identifier: An identifier to use for this member of the
+                           barrier when participating. Defaults to the
+                           hostname + process id.
+
+        """
+        self.client = client
+        self.path = path
+        self.num_clients = num_clients
+        self._identifier = identifier or '%s-%s' % (
+            socket.getfqdn(), os.getpid())
+        self.participating = False
+        self.assured_path = False
+        self.node_name = uuid.uuid4().hex
+        self.create_path = self.path + "/" + self.node_name
+
+    def enter(self):
+        """Enter the barrier, blocks until all nodes have entered"""
+        try:
+            self.client.retry(self._inner_enter)
+            self.participating = True
+        except KazooException:
+            # We failed to enter, best effort cleanup
+            self._best_effort_cleanup()
+            self.participating = False
+
+    def _inner_enter(self):
+        # make sure our barrier parent node exists
+        if not self.assured_path:
+            self.client.ensure_path(self.path)
+            self.assured_path = True
+
+        ready = self.client.handler.event_object()
+
+        try:
+            self.client.create(self.create_path,
+                self._identifier.encode('utf-8'), ephemeral=True)
+        except NodeExistsError:
+            pass
+
+        def created(event):
+            if event.type == EventType.CREATED:
+                ready.set()
+
+        self.client.exists(self.path + '/' + 'ready', watch=created)
+
+        children = self.client.get_children(self.path)
+
+        if len(children) < self.num_clients:
+            ready.wait()
+        else:
+            self.client.ensure_path(self.path + '/ready')
+        return True
+
+    def leave(self):
+        """Leave the barrier, blocks until all nodes have left"""
+        try:
+            self.client.retry(self._inner_leave)
+        except KazooException:  # pragma: nocover
+            # Failed to cleanly leave
+            self._best_effort_cleanup()
+        self.participating = False
+
+    def _inner_leave(self):
+        # Delete the ready node if its around
+        try:
+            self.client.delete(self.path + '/ready')
+        except NoNodeError:
+            pass
+
+        while True:
+            children = self.client.get_children(self.path)
+            if not children:
+                return True
+
+            if len(children) == 1 and children[0] == self.node_name:
+                self.client.delete(self.create_path)
+                return True
+
+            children.sort()
+
+            ready = self.client.handler.event_object()
+
+            def deleted(event):
+                if event.type == EventType.DELETED:
+                    ready.set()
+
+            if self.node_name == children[0]:
+                # We're first, wait on the highest to leave
+                if not self.client.exists(self.path + '/' + children[-1],
+                                          watch=deleted):
+                    continue
+
+                ready.wait()
+                continue
+
+            # Delete our node
+            self.client.delete(self.create_path)
+
+            # Wait on the first
+            if not self.client.exists(self.path + '/' + children[0],
+                                      watch=deleted):
+                continue
+
+            # Wait for the lowest to be deleted
+            ready.wait()
+
+    def _best_effort_cleanup(self):
+        try:
+            self.client.retry(self.client.delete, self.create_path)
+        except NoNodeError:
+            pass
diff --git a/slider-agent/src/main/python/kazoo/recipe/counter.py b/slider-agent/src/main/python/kazoo/recipe/counter.py
new file mode 100644
index 0000000..026d724
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/counter.py
@@ -0,0 +1,95 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Counter
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+
+from kazoo.exceptions import BadVersionError
+from kazoo.retry import ForceRetryError
+
+
+class Counter(object):
+    """Kazoo Counter
+
+    A shared counter of either int or float values. Changes to the
+    counter are done atomically. The general retry policy is used to
+    retry operations if concurrent changes are detected.
+
+    The data is marshaled using `repr(value)` and converted back using
+    `type(counter.default)(value)` both using an ascii encoding. As
+    such other data types might be used for the counter value.
+
+    Counter changes can raise
+    :class:`~kazoo.exceptions.BadVersionError` if the retry policy
+    wasn't able to apply a change.
+
+    Example usage:
+
+    .. code-block:: python
+
+        zk = KazooClient()
+        counter = zk.Counter("/int")
+        counter += 2
+        counter -= 1
+        counter.value == 1
+
+        counter = zk.Counter("/float", default=1.0)
+        counter += 2.0
+        counter.value == 3.0
+
+    """
+    def __init__(self, client, path, default=0):
+        """Create a Kazoo Counter
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The counter path to use.
+        :param default: The default value.
+
+        """
+        self.client = client
+        self.path = path
+        self.default = default
+        self.default_type = type(default)
+        self._ensured_path = False
+
+    def _ensure_node(self):
+        if not self._ensured_path:
+            # make sure our node exists
+            self.client.ensure_path(self.path)
+            self._ensured_path = True
+
+    def _value(self):
+        self._ensure_node()
+        old, stat = self.client.get(self.path)
+        old = old.decode('ascii') if old != b'' else self.default
+        version = stat.version
+        data = self.default_type(old)
+        return data, version
+
+    @property
+    def value(self):
+        return self._value()[0]
+
+    def _change(self, value):
+        if not isinstance(value, self.default_type):
+            raise TypeError('invalid type for value change')
+        self.client.retry(self._inner_change, value)
+        return self
+
+    def _inner_change(self, value):
+        data, version = self._value()
+        data = repr(data + value).encode('ascii')
+        try:
+            self.client.set(self.path, data, version=version)
+        except BadVersionError:  # pragma: nocover
+            raise ForceRetryError()
+
+    def __add__(self, value):
+        """Add value to counter."""
+        return self._change(value)
+
+    def __sub__(self, value):
+        """Subtract value from counter."""
+        return self._change(-value)
diff --git a/slider-agent/src/main/python/kazoo/recipe/election.py b/slider-agent/src/main/python/kazoo/recipe/election.py
new file mode 100644
index 0000000..5349e26
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/election.py
@@ -0,0 +1,80 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""ZooKeeper Leader Elections
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+from kazoo.exceptions import CancelledError
+
+
+class Election(object):
+    """Kazoo Basic Leader Election
+
+    Example usage with a :class:`~kazoo.client.KazooClient` instance::
+
+        zk = KazooClient()
+        election = zk.Election("/electionpath", "my-identifier")
+
+        # blocks until the election is won, then calls
+        # my_leader_function()
+        election.run(my_leader_function)
+
+    """
+    def __init__(self, client, path, identifier=None):
+        """Create a Kazoo Leader Election
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The election path to use.
+        :param identifier: Name to use for this lock contender. This
+                           can be useful for querying to see who the
+                           current lock contenders are.
+
+        """
+        self.lock = client.Lock(path, identifier)
+
+    def run(self, func, *args, **kwargs):
+        """Contend for the leadership
+
+        This call will block until either this contender is cancelled
+        or this contender wins the election and the provided leadership
+        function subsequently returns or fails.
+
+        :param func: A function to be called if/when the election is
+                     won.
+        :param args: Arguments to leadership function.
+        :param kwargs: Keyword arguments to leadership function.
+
+        """
+        if not callable(func):
+            raise ValueError("leader function is not callable")
+
+        try:
+            with self.lock:
+                func(*args, **kwargs)
+
+        except CancelledError:
+            pass
+
+    def cancel(self):
+        """Cancel participation in the election
+
+        .. note::
+
+            If this contender has already been elected leader, this
+            method will not interrupt the leadership function.
+
+        """
+        self.lock.cancel()
+
+    def contenders(self):
+        """Return an ordered list of the current contenders in the
+        election
+
+        .. note::
+
+            If the contenders did not set an identifier, it will appear
+            as a blank string.
+
+        """
+        return self.lock.contenders()
diff --git a/slider-agent/src/main/python/kazoo/recipe/lock.py b/slider-agent/src/main/python/kazoo/recipe/lock.py
new file mode 100644
index 0000000..8f14847
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/lock.py
@@ -0,0 +1,521 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Locking Implementations
+
+:Maintainer: None
+:Status: Unknown
+
+Error Handling
+==============
+
+It's highly recommended to add a state listener with
+:meth:`~KazooClient.add_listener` and watch for
+:attr:`~KazooState.LOST` and :attr:`~KazooState.SUSPENDED` state
+changes and re-act appropriately. In the event that a
+:attr:`~KazooState.LOST` state occurs, its certain that the lock
+and/or the lease has been lost.
+
+"""
+import uuid
+
+from kazoo.retry import (
+    KazooRetry,
+    RetryFailedError,
+    ForceRetryError
+)
+from kazoo.exceptions import CancelledError
+from kazoo.exceptions import KazooException
+from kazoo.exceptions import LockTimeout
+from kazoo.exceptions import NoNodeError
+from kazoo.protocol.states import KazooState
+
+
+class Lock(object):
+    """Kazoo Lock
+
+    Example usage with a :class:`~kazoo.client.KazooClient` instance:
+
+    .. code-block:: python
+
+        zk = KazooClient()
+        lock = zk.Lock("/lockpath", "my-identifier")
+        with lock:  # blocks waiting for lock acquisition
+            # do something with the lock
+
+    Note: This lock is re-entrant. Repeat calls after acquired will
+    continue to return ''True''.
+
+    """
+    _NODE_NAME = '__lock__'
+
+    def __init__(self, client, path, identifier=None):
+        """Create a Kazoo lock.
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The lock path to use.
+        :param identifier: Name to use for this lock contender. This
+                           can be useful for querying to see who the
+                           current lock contenders are.
+
+        """
+        self.client = client
+        self.path = path
+
+        # some data is written to the node. this can be queried via
+        # contenders() to see who is contending for the lock
+        self.data = str(identifier or "").encode('utf-8')
+
+        self.wake_event = client.handler.event_object()
+
+        # props to Netflix Curator for this trick. It is possible for our
+        # create request to succeed on the server, but for a failure to
+        # prevent us from getting back the full path name. We prefix our
+        # lock name with a uuid and can check for its presence on retry.
+        self.prefix = uuid.uuid4().hex + self._NODE_NAME
+        self.create_path = self.path + "/" + self.prefix
+
+        self.create_tried = False
+        self.is_acquired = False
+        self.assured_path = False
+        self.cancelled = False
+        self._retry = KazooRetry(max_tries=None,
+                                 sleep_func=client.handler.sleep_func)
+
+    def _ensure_path(self):
+        self.client.ensure_path(self.path)
+        self.assured_path = True
+
+    def cancel(self):
+        """Cancel a pending lock acquire."""
+        self.cancelled = True
+        self.wake_event.set()
+
+    def acquire(self, blocking=True, timeout=None):
+        """
+        Acquire the lock. By defaults blocks and waits forever.
+
+        :param blocking: Block until lock is obtained or return immediately.
+        :type blocking: bool
+        :param timeout: Don't wait forever to acquire the lock.
+        :type timeout: float or None
+
+        :returns: Was the lock acquired?
+        :rtype: bool
+
+        :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock
+                 wasn't acquired within `timeout` seconds.
+
+        .. versionadded:: 1.1
+            The timeout option.
+        """
+        try:
+            retry = self._retry.copy()
+            retry.deadline = timeout
+            self.is_acquired = retry(self._inner_acquire,
+                blocking=blocking, timeout=timeout)
+        except RetryFailedError:
+            self._best_effort_cleanup()
+        except KazooException:
+            # if we did ultimately fail, attempt to clean up
+            self._best_effort_cleanup()
+            self.cancelled = False
+            raise
+
+        if not self.is_acquired:
+            self._delete_node(self.node)
+
+        return self.is_acquired
+
+    def _watch_session(self, state):
+        self.wake_event.set()
+        return True
+
+    def _inner_acquire(self, blocking, timeout):
+        # make sure our election parent node exists
+        if not self.assured_path:
+            self._ensure_path()
+
+        node = None
+        if self.create_tried:
+            node = self._find_node()
+        else:
+            self.create_tried = True
+
+        if not node:
+            node = self.client.create(self.create_path, self.data,
+                                      ephemeral=True, sequence=True)
+            # strip off path to node
+            node = node[len(self.path) + 1:]
+
+        self.node = node
+
+        while True:
+            self.wake_event.clear()
+
+            # bail out with an exception if cancellation has been requested
+            if self.cancelled:
+                raise CancelledError()
+
+            children = self._get_sorted_children()
+
+            try:
+                our_index = children.index(node)
+            except ValueError:  # pragma: nocover
+                # somehow we aren't in the children -- probably we are
+                # recovering from a session failure and our ephemeral
+                # node was removed
+                raise ForceRetryError()
+
+            if self.acquired_lock(children, our_index):
+                return True
+
+            if not blocking:
+                return False
+
+            # otherwise we are in the mix. watch predecessor and bide our time
+            predecessor = self.path + "/" + children[our_index - 1]
+            self.client.add_listener(self._watch_session)
+            try:
+                if self.client.exists(predecessor, self._watch_predecessor):
+                    self.wake_event.wait(timeout)
+                    if not self.wake_event.isSet():
+                        raise LockTimeout("Failed to acquire lock on %s after %s "
+                                          "seconds" % (self.path, timeout))
+            finally:
+                self.client.remove_listener(self._watch_session)
+
+    def acquired_lock(self, children, index):
+        return index == 0
+
+    def _watch_predecessor(self, event):
+        self.wake_event.set()
+
+    def _get_sorted_children(self):
+        children = self.client.get_children(self.path)
+
+        # can't just sort directly: the node names are prefixed by uuids
+        lockname = self._NODE_NAME
+        children.sort(key=lambda c: c[c.find(lockname) + len(lockname):])
+        return children
+
+    def _find_node(self):
+        children = self.client.get_children(self.path)
+        for child in children:
+            if child.startswith(self.prefix):
+                return child
+        return None
+
+    def _delete_node(self, node):
+        self.client.delete(self.path + "/" + node)
+
+    def _best_effort_cleanup(self):
+        try:
+            node = self._find_node()
+            if node:
+                self._delete_node(node)
+        except KazooException:  # pragma: nocover
+            pass
+
+    def release(self):
+        """Release the lock immediately."""
+        return self.client.retry(self._inner_release)
+
+    def _inner_release(self):
+        if not self.is_acquired:
+            return False
+
+        try:
+            self._delete_node(self.node)
+        except NoNodeError:  # pragma: nocover
+            pass
+
+        self.is_acquired = False
+        self.node = None
+
+        return True
+
+    def contenders(self):
+        """Return an ordered list of the current contenders for the
+        lock.
+
+        .. note::
+
+            If the contenders did not set an identifier, it will appear
+            as a blank string.
+
+        """
+        # make sure our election parent node exists
+        if not self.assured_path:
+            self._ensure_path()
+
+        children = self._get_sorted_children()
+
+        contenders = []
+        for child in children:
+            try:
+                data, stat = self.client.get(self.path + "/" + child)
+                contenders.append(data.decode('utf-8'))
+            except NoNodeError:  # pragma: nocover
+                pass
+        return contenders
+
+    def __enter__(self):
+        self.acquire()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.release()
+
+
+class Semaphore(object):
+    """A Zookeeper-based Semaphore
+
+    This synchronization primitive operates in the same manner as the
+    Python threading version only uses the concept of leases to
+    indicate how many available leases are available for the lock
+    rather than counting.
+
+    Example:
+
+    .. code-block:: python
+
+        zk = KazooClient()
+        semaphore = zk.Semaphore("/leasepath", "my-identifier")
+        with semaphore:  # blocks waiting for lock acquisition
+            # do something with the semaphore
+
+    .. warning::
+
+        This class stores the allowed max_leases as the data on the
+        top-level semaphore node. The stored value is checked once
+        against the max_leases of each instance. This check is
+        performed when acquire is called the first time. The semaphore
+        node needs to be deleted to change the allowed leases.
+
+    .. versionadded:: 0.6
+        The Semaphore class.
+
+    .. versionadded:: 1.1
+        The max_leases check.
+
+    """
+    def __init__(self, client, path, identifier=None, max_leases=1):
+        """Create a Kazoo Lock
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The semaphore path to use.
+        :param identifier: Name to use for this lock contender. This
+                           can be useful for querying to see who the
+                           current lock contenders are.
+        :param max_leases: The maximum amount of leases available for
+                           the semaphore.
+
+        """
+        # Implementation notes about how excessive thundering herd
+        # and watches are avoided
+        # - A node (lease pool) holds children for each lease in use
+        # - A lock is acquired for a process attempting to acquire a
+        #   lease. If a lease is available, the ephemeral node is
+        #   created in the lease pool and the lock is released.
+        # - Only the lock holder watches for children changes in the
+        #   lease pool
+        self.client = client
+        self.path = path
+
+        # some data is written to the node. this can be queried via
+        # contenders() to see who is contending for the lock
+        self.data = str(identifier or "").encode('utf-8')
+        self.max_leases = max_leases
+        self.wake_event = client.handler.event_object()
+
+        self.create_path = self.path + "/" + uuid.uuid4().hex
+        self.lock_path = path + '-' + '__lock__'
+        self.is_acquired = False
+        self.assured_path = False
+        self.cancelled = False
+        self._session_expired = False
+
+    def _ensure_path(self):
+        result = self.client.ensure_path(self.path)
+        self.assured_path = True
+        if result is True:
+            # node did already exist
+            data, _ = self.client.get(self.path)
+            try:
+                leases = int(data.decode('utf-8'))
+            except (ValueError, TypeError):
+                # ignore non-numeric data, maybe the node data is used
+                # for other purposes
+                pass
+            else:
+                if leases != self.max_leases:
+                    raise ValueError(
+                        "Inconsistent max leases: %s, expected: %s" %
+                        (leases, self.max_leases)
+                    )
+        else:
+            self.client.set(self.path, str(self.max_leases).encode('utf-8'))
+
+    def cancel(self):
+        """Cancel a pending semaphore acquire."""
+        self.cancelled = True
+        self.wake_event.set()
+
+    def acquire(self, blocking=True, timeout=None):
+        """Acquire the semaphore. By defaults blocks and waits forever.
+
+        :param blocking: Block until semaphore is obtained or
+                         return immediately.
+        :type blocking: bool
+        :param timeout: Don't wait forever to acquire the semaphore.
+        :type timeout: float or None
+
+        :returns: Was the semaphore acquired?
+        :rtype: bool
+
+        :raises:
+            ValueError if the max_leases value doesn't match the
+            stored value.
+
+            :exc:`~kazoo.exceptions.LockTimeout` if the semaphore
+            wasn't acquired within `timeout` seconds.
+
+        .. versionadded:: 1.1
+            The blocking, timeout arguments and the max_leases check.
+        """
+        # If the semaphore had previously been canceled, make sure to
+        # reset that state.
+        self.cancelled = False
+
+        try:
+            self.is_acquired = self.client.retry(
+                self._inner_acquire, blocking=blocking, timeout=timeout)
+        except KazooException:
+            # if we did ultimately fail, attempt to clean up
+            self._best_effort_cleanup()
+            self.cancelled = False
+            raise
+
+        return self.is_acquired
+
+    def _inner_acquire(self, blocking, timeout=None):
+        """Inner loop that runs from the top anytime a command hits a
+        retryable Zookeeper exception."""
+        self._session_expired = False
+        self.client.add_listener(self._watch_session)
+
+        if not self.assured_path:
+            self._ensure_path()
+
+        # Do we already have a lease?
+        if self.client.exists(self.create_path):
+            return True
+
+        with self.client.Lock(self.lock_path, self.data):
+            while True:
+                self.wake_event.clear()
+
+                # Attempt to grab our lease...
+                if self._get_lease():
+                    return True
+
+                if blocking:
+                    # If blocking, wait until self._watch_lease_change() is
+                    # called before returning
+                    self.wake_event.wait(timeout)
+                    if not self.wake_event.isSet():
+                        raise LockTimeout(
+                            "Failed to acquire semaphore on %s "
+                            "after %s seconds" % (self.path, timeout))
+                else:
+                    # If not blocking, register another watch that will trigger
+                    # self._get_lease() as soon as the children change again.
+                    self.client.get_children(self.path, self._get_lease)
+                    return False
+
+    def _watch_lease_change(self, event):
+        self.wake_event.set()
+
+    def _get_lease(self, data=None):
+        # Make sure the session is still valid
+        if self._session_expired:
+            raise ForceRetryError("Retry on session loss at top")
+
+        # Make sure that the request hasn't been canceled
+        if self.cancelled:
+            raise CancelledError("Semaphore cancelled")
+
+        # Get a list of the current potential lock holders. If they change,
+        # notify our wake_event object. This is used to unblock a blocking
+        # self._inner_acquire call.
+        children = self.client.get_children(self.path,
+                                            self._watch_lease_change)
+
+        # If there are leases available, acquire one
+        if len(children) < self.max_leases:
+            self.client.create(self.create_path, self.data, ephemeral=True)
+
+        # Check if our acquisition was successful or not. Update our state.
+        if self.client.exists(self.create_path):
+            self.is_acquired = True
+        else:
+            self.is_acquired = False
+
+        # Return current state
+        return self.is_acquired
+
+    def _watch_session(self, state):
+        if state == KazooState.LOST:
+            self._session_expired = True
+            self.wake_event.set()
+
+            # Return true to de-register
+            return True
+
+    def _best_effort_cleanup(self):
+        try:
+            self.client.delete(self.create_path)
+        except KazooException:  # pragma: nocover
+            pass
+
+    def release(self):
+        """Release the lease immediately."""
+        return self.client.retry(self._inner_release)
+
+    def _inner_release(self):
+        if not self.is_acquired:
+            return False
+
+        try:
+            self.client.delete(self.create_path)
+        except NoNodeError:  # pragma: nocover
+            pass
+        self.is_acquired = False
+        return True
+
+    def lease_holders(self):
+        """Return an unordered list of the current lease holders.
+
+        .. note::
+
+            If the lease holder did not set an identifier, it will
+            appear as a blank string.
+
+        """
+        if not self.client.exists(self.path):
+            return []
+
+        children = self.client.get_children(self.path)
+
+        lease_holders = []
+        for child in children:
+            try:
+                data, stat = self.client.get(self.path + "/" + child)
+                lease_holders.append(data.decode('utf-8'))
+            except NoNodeError:  # pragma: nocover
+                pass
+        return lease_holders
+
+    def __enter__(self):
+        self.acquire()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.release()
diff --git a/slider-agent/src/main/python/kazoo/recipe/partitioner.py b/slider-agent/src/main/python/kazoo/recipe/partitioner.py
new file mode 100644
index 0000000..0d92661
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/partitioner.py
@@ -0,0 +1,378 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper Partitioner Implementation
+
+:Maintainer: None
+:Status: Unknown
+
+:class:`SetPartitioner` implements a partitioning scheme using
+Zookeeper for dividing up resources amongst members of a party.
+
+This is useful when there is a set of resources that should only be
+accessed by a single process at a time that multiple processes
+across a cluster might want to divide up.
+
+Example Use-Case
+----------------
+
+- Multiple workers across a cluster need to divide up a list of queues
+  so that no two workers own the same queue.
+
+"""
+import logging
+import os
+import socket
+from functools import partial
+
+from kazoo.exceptions import KazooException
+from kazoo.protocol.states import KazooState
+from kazoo.recipe.watchers import PatientChildrenWatch
+
+log = logging.getLogger(__name__)
+
+
+class PartitionState(object):
+    """High level partition state values
+
+    .. attribute:: ALLOCATING
+
+        The set needs to be partitioned, and may require an existing
+        partition set to be released before acquiring a new partition
+        of the set.
+
+    .. attribute:: ACQUIRED
+
+        The set has been partitioned and acquired.
+
+    .. attribute:: RELEASE
+
+        The set needs to be repartitioned, and the current partitions
+        must be released before a new allocation can be made.
+
+    .. attribute:: FAILURE
+
+        The set partition has failed. This occurs when the maximum
+        time to partition the set is exceeded or the Zookeeper session
+        is lost. The partitioner is unusable after this state and must
+        be recreated.
+
+    """
+    ALLOCATING = "ALLOCATING"
+    ACQUIRED = "ACQUIRED"
+    RELEASE = "RELEASE"
+    FAILURE = "FAILURE"
+
+
+class SetPartitioner(object):
+    """Partitions a set amongst members of a party
+
+    This class will partition a set amongst members of a party such
+    that each member will be given zero or more items of the set and
+    each set item will be given to a single member. When new members
+    enter or leave the party, the set will be re-partitioned amongst
+    the members.
+
+    When the :class:`SetPartitioner` enters the
+    :attr:`~PartitionState.FAILURE` state, it is unrecoverable
+    and a new :class:`SetPartitioner` should be created.
+
+    Example:
+
+    .. code-block:: python
+
+        from kazoo.client import KazooClient
+        client = KazooClient()
+
+        qp = client.SetPartitioner(
+            path='/work_queues', set=('queue-1', 'queue-2', 'queue-3'))
+
+        while 1:
+            if qp.failed:
+                raise Exception("Lost or unable to acquire partition")
+            elif qp.release:
+                qp.release_set()
+            elif qp.acquired:
+                for partition in qp:
+                    # Do something with each partition
+            elif qp.allocating:
+                qp.wait_for_acquire()
+
+    **State Transitions**
+
+    When created, the :class:`SetPartitioner` enters the
+    :attr:`PartitionState.ALLOCATING` state.
+
+    :attr:`~PartitionState.ALLOCATING` ->
+    :attr:`~PartitionState.ACQUIRED`
+
+        Set was partitioned successfully, the partition list assigned
+        is accessible via list/iter methods or calling list() on the
+        :class:`SetPartitioner` instance.
+
+    :attr:`~PartitionState.ALLOCATING` ->
+    :attr:`~PartitionState.FAILURE`
+
+        Allocating the set failed either due to a Zookeeper session
+        expiration, or failure to acquire the items of the set within
+        the timeout period.
+
+    :attr:`~PartitionState.ACQUIRED` ->
+    :attr:`~PartitionState.RELEASE`
+
+        The members of the party have changed, and the set needs to be
+        repartitioned. :meth:`SetPartitioner.release` should be called
+        as soon as possible.
+
+    :attr:`~PartitionState.ACQUIRED` ->
+    :attr:`~PartitionState.FAILURE`
+
+        The current partition was lost due to a Zookeeper session
+        expiration.
+
+    :attr:`~PartitionState.RELEASE` ->
+    :attr:`~PartitionState.ALLOCATING`
+
+        The current partition was released and is being re-allocated.
+
+    """
+    def __init__(self, client, path, set, partition_func=None,
+                 identifier=None, time_boundary=30):
+        """Create a :class:`~SetPartitioner` instance
+
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The partition path to use.
+        :param set: The set of items to partition.
+        :param partition_func: A function to use to decide how to
+                               partition the set.
+        :param identifier: An identifier to use for this member of the
+                           party when participating. Defaults to the
+                           hostname + process id.
+        :param time_boundary: How long the party members must be stable
+                              before allocation can complete.
+
+        """
+        self.state = PartitionState.ALLOCATING
+
+        self._client = client
+        self._path = path
+        self._set = set
+        self._partition_set = []
+        self._partition_func = partition_func or self._partitioner
+        self._identifier = identifier or '%s-%s' % (
+            socket.getfqdn(), os.getpid())
+        self._locks = []
+        self._lock_path = '/'.join([path, 'locks'])
+        self._party_path = '/'.join([path, 'party'])
+        self._time_boundary = time_boundary
+
+        self._acquire_event = client.handler.event_object()
+
+        # Create basic path nodes
+        client.ensure_path(path)
+        client.ensure_path(self._lock_path)
+        client.ensure_path(self._party_path)
+
+        # Join the party
+        self._party = client.ShallowParty(self._party_path,
+                                          identifier=self._identifier)
+        self._party.join()
+
+        self._was_allocated = False
+        self._state_change = client.handler.rlock_object()
+        client.add_listener(self._establish_sessionwatch)
+
+        # Now watch the party and set the callback on the async result
+        # so we know when we're ready
+        self._children_updated = False
+        self._child_watching(self._allocate_transition, async=True)
+
+    def __iter__(self):
+        """Return the partitions in this partition set"""
+        for partition in self._partition_set:
+            yield partition
+
+    @property
+    def failed(self):
+        """Corresponds to the :attr:`PartitionState.FAILURE` state"""
+        return self.state == PartitionState.FAILURE
+
+    @property
+    def release(self):
+        """Corresponds to the :attr:`PartitionState.RELEASE` state"""
+        return self.state == PartitionState.RELEASE
+
+    @property
+    def allocating(self):
+        """Corresponds to the :attr:`PartitionState.ALLOCATING`
+        state"""
+        return self.state == PartitionState.ALLOCATING
+
+    @property
+    def acquired(self):
+        """Corresponds to the :attr:`PartitionState.ACQUIRED` state"""
+        return self.state == PartitionState.ACQUIRED
+
+    def wait_for_acquire(self, timeout=30):
+        """Wait for the set to be partitioned and acquired
+
+        :param timeout: How long to wait before returning.
+        :type timeout: int
+
+        """
+        self._acquire_event.wait(timeout)
+
+    def release_set(self):
+        """Call to release the set
+
+        This method begins the step of allocating once the set has
+        been released.
+
+        """
+        self._release_locks()
+        if self._locks:  # pragma: nocover
+            # This shouldn't happen, it means we couldn't release our
+            # locks, abort
+            self._fail_out()
+            return
+        else:
+            with self._state_change:
+                if self.failed:
+                    return
+                self.state = PartitionState.ALLOCATING
+        self._child_watching(self._allocate_transition, async=True)
+
+    def finish(self):
+        """Call to release the set and leave the party"""
+        self._release_locks()
+        self._fail_out()
+
+    def _fail_out(self):
+        with self._state_change:
+            self.state = PartitionState.FAILURE
+        if self._party.participating:
+            try:
+                self._party.leave()
+            except KazooException:  # pragma: nocover
+                pass
+
+    def _allocate_transition(self, result):
+        """Called when in allocating mode, and the children settled"""
+        # Did we get an exception waiting for children to settle?
+        if result.exception:  # pragma: nocover
+            self._fail_out()
+            return
+
+        children, async_result = result.get()
+        self._children_updated = False
+
+        # Add a callback when children change on the async_result
+        def updated(result):
+            with self._state_change:
+                if self.acquired:
+                    self.state = PartitionState.RELEASE
+            self._children_updated = True
+
+        async_result.rawlink(updated)
+
+        # Split up the set
+        self._partition_set = self._partition_func(
+            self._identifier, list(self._party), self._set)
+
+        # Proceed to acquire locks for the working set as needed
+        for member in self._partition_set:
+            if self._children_updated or self.failed:
+                # Still haven't settled down, release locks acquired
+                # so far and go back
+                return self._abort_lock_acquisition()
+
+            lock = self._client.Lock(self._lock_path + '/' +
+                                     str(member))
+            try:
+                lock.acquire()
+            except KazooException:  # pragma: nocover
+                return self.finish()
+            self._locks.append(lock)
+
+        # All locks acquired! Time for state transition, make sure
+        # we didn't inadvertently get lost thus far
+        with self._state_change:
+            if self.failed:  # pragma: nocover
+                return self.finish()
+            self.state = PartitionState.ACQUIRED
+            self._acquire_event.set()
+
+    def _release_locks(self):
+        """Attempt to completely remove all the locks"""
+        self._acquire_event.clear()
+        for lock in self._locks[:]:
+            try:
+                lock.release()
+            except KazooException:  # pragma: nocover
+                # We proceed to remove as many as possible, and leave
+                # the ones we couldn't remove
+                pass
+            else:
+                self._locks.remove(lock)
+
+    def _abort_lock_acquisition(self):
+        """Called during lock acquisition if a party change occurs"""
+        self._partition_set = []
+        self._release_locks()
+        if self._locks:
+            # This shouldn't happen, it means we couldn't release our
+            # locks, abort
+            self._fail_out()
+            return
+        return self._child_watching(self._allocate_transition)
+
+    def _child_watching(self, func=None, async=False):
+        """Called when children are being watched to stabilize
+
+        This actually returns immediately, child watcher spins up a
+        new thread/greenlet and waits for it to stabilize before
+        any callbacks might run.
+
+        """
+        watcher = PatientChildrenWatch(self._client, self._party_path,
+                                       self._time_boundary)
+        asy = watcher.start()
+        if func is not None:
+            # We spin up the function in a separate thread/greenlet
+            # to ensure that the rawlink's it might use won't be
+            # blocked
+            if async:
+                func = partial(self._client.handler.spawn, func)
+            asy.rawlink(func)
+        return asy
+
+    def _establish_sessionwatch(self, state):
+        """Register ourself to listen for session events, we shut down
+        if we become lost"""
+        with self._state_change:
+            # Handle network partition: If connection gets suspended,
+            # change state to ALLOCATING if we had already ACQUIRED. This way
+            # the caller does not process the members since we could eventually
+            # lose session get repartitioned. If we got connected after a suspension
+            # it means we've not lost the session and still have our members. Hence,
+            # restore to ACQUIRED
+            if state == KazooState.SUSPENDED:
+                if self.state == PartitionState.ACQUIRED:
+                    self._was_allocated = True
+                    self.state = PartitionState.ALLOCATING
+            elif state == KazooState.CONNECTED:
+                if self._was_allocated:
+                    self._was_allocated = False
+                    self.state = PartitionState.ACQUIRED
+
+        if state == KazooState.LOST:
+            self._client.handler.spawn(self._fail_out)
+            return True
+
+    def _partitioner(self, identifier, members, partitions):
+        # Ensure consistent order of partitions/members
+        all_partitions = sorted(partitions)
+        workers = sorted(members)
+
+        i = workers.index(identifier)
+        # Now return the partition list starting at our location and
+        # skipping the other workers
+        return all_partitions[i::len(workers)]
diff --git a/slider-agent/src/main/python/kazoo/recipe/party.py b/slider-agent/src/main/python/kazoo/recipe/party.py
new file mode 100644
index 0000000..4fd873e
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/party.py
@@ -0,0 +1,119 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Party
+
+:Maintainer: Ben Bangert <ben@groovie.org>
+:Status: Production
+
+A Zookeeper pool of party members. The :class:`Party` object can be
+used for determining members of a party.
+
+"""
+import uuid
+
+from kazoo.exceptions import NodeExistsError, NoNodeError
+
+
+class BaseParty(object):
+    """Base implementation of a party."""
+    def __init__(self, client, path, identifier=None):
+        """
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The party path to use.
+        :param identifier: An identifier to use for this member of the
+                           party when participating.
+
+        """
+        self.client = client
+        self.path = path
+        self.data = str(identifier or "").encode('utf-8')
+        self.ensured_path = False
+        self.participating = False
+
+    def _ensure_parent(self):
+        if not self.ensured_path:
+            # make sure our parent node exists
+            self.client.ensure_path(self.path)
+            self.ensured_path = True
+
+    def join(self):
+        """Join the party"""
+        return self.client.retry(self._inner_join)
+
+    def _inner_join(self):
+        self._ensure_parent()
+        try:
+            self.client.create(self.create_path, self.data, ephemeral=True)
+            self.participating = True
+        except NodeExistsError:
+            # node was already created, perhaps we are recovering from a
+            # suspended connection
+            self.participating = True
+
+    def leave(self):
+        """Leave the party"""
+        self.participating = False
+        return self.client.retry(self._inner_leave)
+
+    def _inner_leave(self):
+        try:
+            self.client.delete(self.create_path)
+        except NoNodeError:
+            return False
+        return True
+
+    def __len__(self):
+        """Return a count of participating clients"""
+        self._ensure_parent()
+        return len(self._get_children())
+
+    def _get_children(self):
+        return self.client.retry(self.client.get_children, self.path)
+
+
+class Party(BaseParty):
+    """Simple pool of participating processes"""
+    _NODE_NAME = "__party__"
+
+    def __init__(self, client, path, identifier=None):
+        BaseParty.__init__(self, client, path, identifier=identifier)
+        self.node = uuid.uuid4().hex + self._NODE_NAME
+        self.create_path = self.path + "/" + self.node
+
+    def __iter__(self):
+        """Get a list of participating clients' data values"""
+        self._ensure_parent()
+        children = self._get_children()
+        for child in children:
+            try:
+                d, _ = self.client.retry(self.client.get, self.path +
+                                         "/" + child)
+                yield d.decode('utf-8')
+            except NoNodeError:  # pragma: nocover
+                pass
+
+    def _get_children(self):
+        children = BaseParty._get_children(self)
+        return [c for c in children if self._NODE_NAME in c]
+
+
+class ShallowParty(BaseParty):
+    """Simple shallow pool of participating processes
+
+    This differs from the :class:`Party` as the identifier is used in
+    the name of the party node itself, rather than the data. This
+    places some restrictions on the length as it must be a valid
+    Zookeeper node (an alphanumeric string), but reduces the overhead
+    of getting a list of participants to a single Zookeeper call.
+
+    """
+    def __init__(self, client, path, identifier=None):
+        BaseParty.__init__(self, client, path, identifier=identifier)
+        self.node = '-'.join([uuid.uuid4().hex, self.data.decode('utf-8')])
+        self.create_path = self.path + "/" + self.node
+
+    def __iter__(self):
+        """Get a list of participating clients' identifiers"""
+        self._ensure_parent()
+        children = self._get_children()
+        for child in children:
+            yield child[child.find('-') + 1:]
diff --git a/slider-agent/src/main/python/kazoo/recipe/queue.py b/slider-agent/src/main/python/kazoo/recipe/queue.py
new file mode 100644
index 0000000..81289b9
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/queue.py
@@ -0,0 +1,322 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Zookeeper based queue implementations.
+
+:Maintainer: None
+:Status: Unknown
+
+"""
+
+import uuid
+from kazoo.exceptions import NoNodeError, NodeExistsError
+from kazoo.retry import ForceRetryError
+from kazoo.protocol.states import EventType
+
+
+class BaseQueue(object):
+    """A common base class for queue implementations."""
+
+    def __init__(self, client, path):
+        """
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The queue path to use in ZooKeeper.
+        """
+        self.client = client
+        self.path = path
+        self._entries_path = path
+        self.structure_paths = (self.path, )
+        self.ensured_path = False
+
+    def _check_put_arguments(self, value, priority=100):
+        if not isinstance(value, bytes):
+            raise TypeError("value must be a byte string")
+        if not isinstance(priority, int):
+            raise TypeError("priority must be an int")
+        elif priority < 0 or priority > 999:
+            raise ValueError("priority must be between 0 and 999")
+
+    def _ensure_paths(self):
+        if not self.ensured_path:
+            # make sure our parent / internal structure nodes exists
+            for path in self.structure_paths:
+                self.client.ensure_path(path)
+            self.ensured_path = True
+
+    def __len__(self):
+        self._ensure_paths()
+        _, stat = self.client.retry(self.client.get, self._entries_path)
+        return stat.children_count
+
+
+class Queue(BaseQueue):
+    """A distributed queue with optional priority support.
+
+    This queue does not offer reliable consumption. An entry is removed
+    from the queue prior to being processed. So if an error occurs, the
+    consumer has to re-queue the item or it will be lost.
+
+    """
+
+    prefix = "entry-"
+
+    def __init__(self, client, path):
+        """
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The queue path to use in ZooKeeper.
+        """
+        super(Queue, self).__init__(client, path)
+        self._children = []
+
+    def __len__(self):
+        """Return queue size."""
+        return super(Queue, self).__len__()
+
+    def get(self):
+        """
+        Get item data and remove an item from the queue.
+
+        :returns: Item data or None.
+        :rtype: bytes
+        """
+        self._ensure_paths()
+        return self.client.retry(self._inner_get)
+
+    def _inner_get(self):
+        if not self._children:
+            self._children = self.client.retry(self.client.get_children, self.path)
+            self._children = sorted(self._children)
+        if not self._children:
+            return None
+        name = self._children[0]
+        try:
+            data, stat = self.client.get(self.path + "/" + name)
+        except NoNodeError:  # pragma: nocover
+            # the first node has vanished in the meantime, try to
+            # get another one
+            raise ForceRetryError()
+        try:
+            self.client.delete(self.path + "/" + name)
+        except NoNodeError:  # pragma: nocover
+            # we were able to get the data but someone else has removed
+            # the node in the meantime. consider the item as processed
+            # by the other process
+            raise ForceRetryError()
+        self._children.pop(0)
+        return data
+
+    def put(self, value, priority=100):
+        """Put an item into the queue.
+
+        :param value: Byte string to put into the queue.
+        :param priority:
+            An optional priority as an integer with at most 3 digits.
+            Lower values signify higher priority.
+        """
+        self._check_put_arguments(value, priority)
+        self._ensure_paths()
+        path = '{path}/{prefix}{priority:03d}-'.format(
+            path=self.path, prefix=self.prefix, priority=priority)
+        self.client.create(path, value, sequence=True)
+
+
+class LockingQueue(BaseQueue):
+    """A distributed queue with priority and locking support.
+
+    Upon retrieving an entry from the queue, the entry gets locked with an
+    ephemeral node (instead of deleted). If an error occurs, this lock gets
+    released so that others could retake the entry. This adds a little penalty
+    as compared to :class:`Queue` implementation.
+
+    The user should call the :meth:`LockingQueue.get` method first to lock and
+    retrieve the next entry. When finished processing the entry, a user should
+    call the :meth:`LockingQueue.consume` method that will remove the entry
+    from the queue.
+
+    This queue will not track connection status with ZooKeeper. If a node locks
+    an element, then loses connection with ZooKeeper and later reconnects, the
+    lock will probably be removed by Zookeeper in the meantime, but a node
+    would still think that it holds a lock. The user should check the
+    connection status with Zookeeper or call :meth:`LockingQueue.holds_lock`
+    method that will check if a node still holds the lock.
+
+    .. note::
+        :class:`LockingQueue` requires ZooKeeper 3.4 or above, since it is
+        using transactions.
+    """
+    lock = "/taken"
+    entries = "/entries"
+    entry = "entry"
+
+    def __init__(self, client, path):
+        """
+        :param client: A :class:`~kazoo.client.KazooClient` instance.
+        :param path: The queue path to use in ZooKeeper.
+        """
+        super(LockingQueue, self).__init__(client, path)
+        self.id = uuid.uuid4().hex.encode()
+        self.processing_element = None
+        self._lock_path = self.path + self.lock
+        self._entries_path = self.path + self.entries
+        self.structure_paths = (self._lock_path, self._entries_path)
+
+    def __len__(self):
+        """Returns the current length of the queue.
+
+        :returns: queue size (includes locked entries count).
+        """
+        return super(LockingQueue, self).__len__()
+
+    def put(self, value, priority=100):
+        """Put an entry into the queue.
+
+        :param value: Byte string to put into the queue.
+        :param priority:
+            An optional priority as an integer with at most 3 digits.
+            Lower values signify higher priority.
+
+        """
+        self._check_put_arguments(value, priority)
+        self._ensure_paths()
+
+        self.client.create(
+            "{path}/{prefix}-{priority:03d}-".format(
+                path=self._entries_path,
+                prefix=self.entry,
+                priority=priority),
+            value, sequence=True)
+
+    def put_all(self, values, priority=100):
+        """Put several entries into the queue. The action only succeeds
+        if all entries where put into the queue.
+
+        :param values: A list of values to put into the queue.
+        :param priority:
+            An optional priority as an integer with at most 3 digits.
+            Lower values signify higher priority.
+
+        """
+        if not isinstance(values, list):
+            raise TypeError("values must be a list of byte strings")
+        if not isinstance(priority, int):
+            raise TypeError("priority must be an int")
+        elif priority < 0 or priority > 999:
+            raise ValueError("priority must be between 0 and 999")
+        self._ensure_paths()
+
+        with self.client.transaction() as transaction:
+            for value in values:
+                if not isinstance(value, bytes):
+                    raise TypeError("value must be a byte string")
+                transaction.create(
+                    "{path}/{prefix}-{priority:03d}-".format(
+                        path=self._entries_path,
+                        prefix=self.entry,
+                        priority=priority),
+                    value, sequence=True)
+
+    def get(self, timeout=None):
+        """Locks and gets an entry from the queue. If a previously got entry
+        was not consumed, this method will return that entry.
+
+        :param timeout:
+            Maximum waiting time in seconds. If None then it will wait
+            untill an entry appears in the queue.
+        :returns: A locked entry value or None if the timeout was reached.
+        :rtype: bytes
+        """
+        self._ensure_paths()
+        if not self.processing_element is None:
+            return self.processing_element[1]
+        else:
+            return self._inner_get(timeout)
+
+    def holds_lock(self):
+        """Checks if a node still holds the lock.
+
+        :returns: True if a node still holds the lock, False otherwise.
+        :rtype: bool
+        """
+        if self.processing_element is None:
+            return False
+        lock_id, _ = self.processing_element
+        lock_path = "{path}/{id}".format(path=self._lock_path, id=lock_id)
+        self.client.sync(lock_path)
+        value, stat = self.client.retry(self.client.get, lock_path)
+        return value == self.id
+
+    def consume(self):
+        """Removes a currently processing entry from the queue.
+
+        :returns: True if element was removed successfully, False otherwise.
+        :rtype: bool
+        """
+        if not self.processing_element is None and self.holds_lock:
+            id_, value = self.processing_element
+            with self.client.transaction() as transaction:
+                transaction.delete("{path}/{id}".format(
+                    path=self._entries_path,
+                    id=id_))
+                transaction.delete("{path}/{id}".format(
+                    path=self._lock_path,
+                    id=id_))
+            self.processing_element = None
+            return True
+        else:
+            return False
+
+    def _inner_get(self, timeout):
+        flag = self.client.handler.event_object()
+        lock = self.client.handler.lock_object()
+        canceled = False
+        value = []
+
+        def check_for_updates(event):
+            if not event is None and event.type != EventType.CHILD:
+                return
+            with lock:
+                if canceled or flag.isSet():
+                    return
+                values = self.client.retry(self.client.get_children,
+                    self._entries_path,
+                    check_for_updates)
+                taken = self.client.retry(self.client.get_children,
+                    self._lock_path,
+                    check_for_updates)
+                available = self._filter_locked(values, taken)
+                if len(available) > 0:
+                    ret = self._take(available[0])
+                    if not ret is None:
+                        # By this time, no one took the task
+                        value.append(ret)
+                        flag.set()
+
+        check_for_updates(None)
+        retVal = None
+        flag.wait(timeout)
+        with lock:
+            canceled = True
+            if len(value) > 0:
+                # We successfully locked an entry
+                self.processing_element = value[0]
+                retVal = value[0][1]
+        return retVal
+
+    def _filter_locked(self, values, taken):
+        taken = set(taken)
+        available = sorted(values)
+        return (available if len(taken) == 0 else
+            [x for x in available if x not in taken])
+
+    def _take(self, id_):
+        try:
+            self.client.create(
+                "{path}/{id}".format(
+                    path=self._lock_path,
+                    id=id_),
+                self.id,
+                ephemeral=True)
+            value, stat = self.client.retry(self.client.get,
+                "{path}/{id}".format(path=self._entries_path, id=id_))
+        except (NoNodeError, NodeExistsError):
+            # Item is already consumed or locked
+            return None
+        return (id_, value)
diff --git a/slider-agent/src/main/python/kazoo/recipe/watchers.py b/slider-agent/src/main/python/kazoo/recipe/watchers.py
new file mode 100644
index 0000000..f9667f8
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/recipe/watchers.py
@@ -0,0 +1,420 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Higher level child and data watching API's.
+
+:Maintainer: Ben Bangert <ben@groovie.org>
+:Status: Production
+
+.. note::
+
+    :ref:`DataWatch` and :ref:`ChildrenWatch` may only handle a single
+    function, attempts to associate a single instance with multiple functions
+    will result in an exception being thrown.
+
+"""
+import logging
+import time
+import warnings
+from functools import partial, wraps
+
+from kazoo.retry import KazooRetry
+from kazoo.exceptions import (
+    ConnectionClosedError,
+    NoNodeError,
+    KazooException
+)
+from kazoo.protocol.states import KazooState
+
+log = logging.getLogger(__name__)
+
+
+_STOP_WATCHING = object()
+
+
+def _ignore_closed(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except ConnectionClosedError:
+            pass
+    return wrapper
+
+
+class DataWatch(object):
+    """Watches a node for data updates and calls the specified
+    function each time it changes
+
+    The function will also be called the very first time its
+    registered to get the data.
+
+    Returning `False` from the registered function will disable future
+    data change calls. If the client connection is closed (using the
+    close command), the DataWatch will no longer get updates.
+
+    If the function supplied takes three arguments, then the third one
+    will be a :class:`~kazoo.protocol.states.WatchedEvent`. It will
+    only be set if the change to the data occurs as a result of the
+    server notifying the watch that there has been a change. Events
+    like reconnection or the first call will not include an event.
+
+    If the node does not exist, then the function will be called with
+    ``None`` for all values.
+
+    .. tip::
+
+        Because :class:`DataWatch` can watch nodes that don't exist, it
+        can be used alternatively as a higher-level Exists watcher that
+        survives reconnections and session loss.
+
+    Example with client:
+
+    .. code-block:: python
+
+        @client.DataWatch('/path/to/watch')
+        def my_func(data, stat):
+            print("Data is %s" % data)
+            print("Version is %s" % stat.version)
+
+        # Above function is called immediately and prints
+
+        # Or if you want the event object
+        @client.DataWatch('/path/to/watch')
+        def my_func(data, stat, event):
+            print("Data is %s" % data)
+            print("Version is %s" % stat.version)
+            print("Event is %s" % event)
+
+    .. versionchanged:: 1.2
+
+        DataWatch now ignores additional arguments that were previously
+        passed to it and warns that they are no longer respected.
+
+    """
+    def __init__(self, client, path, func=None, *args, **kwargs):
+        """Create a data watcher for a path
+
+        :param client: A zookeeper client.
+        :type client: :class:`~kazoo.client.KazooClient`
+        :param path: The path to watch for data changes on.
+        :type path: str
+        :param func: Function to call initially and every time the
+                     node changes. `func` will be called with a
+                     tuple, the value of the node and a
+                     :class:`~kazoo.client.ZnodeStat` instance.
+        :type func: callable
+
+        """
+        self._client = client
+        self._path = path
+        self._func = func
+        self._stopped = False
+        self._run_lock = client.handler.lock_object()
+        self._version = None
+        self._retry = KazooRetry(max_tries=None,
+            sleep_func=client.handler.sleep_func)
+        self._include_event = None
+        self._ever_called = False
+        self._used = False
+
+        if args or kwargs:
+            warnings.warn('Passing additional arguments to DataWatch is'
+                          ' deprecated. ignore_missing_node is now assumed '
+                          ' to be True by default, and the event will be '
+                          ' sent if the function can handle receiving it',
+                          DeprecationWarning, stacklevel=2)
+
+        # Register our session listener if we're going to resume
+        # across session losses
+        if func is not None:
+            self._used = True
+            self._client.add_listener(self._session_watcher)
+            self._get_data()
+
+    def __call__(self, func):
+        """Callable version for use as a decorator
+
+        :param func: Function to call initially and every time the
+                     data changes. `func` will be called with a
+                     tuple, the value of the node and a
+                     :class:`~kazoo.client.ZnodeStat` instance.
+        :type func: callable
+
+        """
+        if self._used:
+            raise KazooException(
+                "A function has already been associated with this "
+                "DataWatch instance.")
+
+        self._func = func
+
+        self._used = True
+        self._client.add_listener(self._session_watcher)
+        self._get_data()
+        return func
+
+    def _log_func_exception(self, data, stat, event=None):
+        try:
+            # For backwards compatibility, don't send event to the
+            # callback unless the send_event is set in constructor
+            if not self._ever_called:
+                self._ever_called = True
+            try:
+                result = self._func(data, stat, event)
+            except TypeError:
+                result = self._func(data, stat)
+            if result is False:
+                self._stopped = True
+                self._client.remove_listener(self._session_watcher)
+        except Exception as exc:
+            log.exception(exc)
+            raise
+
+    @_ignore_closed
+    def _get_data(self, event=None):
+        # Ensure this runs one at a time, possible because the session
+        # watcher may trigger a run
+        with self._run_lock:
+            if self._stopped:
+                return
+
+            initial_version = self._version
+
+            try:
+                data, stat = self._retry(self._client.get,
+                                         self._path, self._watcher)
+            except NoNodeError:
+                data = None
+
+                # This will set 'stat' to None if the node does not yet
+                # exist.
+                stat = self._retry(self._client.exists, self._path,
+                                   self._watcher)
+                if stat:
+                    self._client.handler.spawn(self._get_data)
+                    return
+
+            # No node data, clear out version
+            if stat is None:
+                self._version = None
+            else:
+                self._version = stat.mzxid
+
+            # Call our function if its the first time ever, or if the
+            # version has changed
+            if initial_version != self._version or not self._ever_called:
+                self._log_func_exception(data, stat, event)
+
+    def _watcher(self, event):
+        self._get_data(event=event)
+
+    def _set_watch(self, state):
+        with self._run_lock:
+            self._watch_established = state
+
+    def _session_watcher(self, state):
+        if state == KazooState.CONNECTED:
+            self._client.handler.spawn(self._get_data)
+
+
+class ChildrenWatch(object):
+    """Watches a node for children updates and calls the specified
+    function each time it changes
+
+    The function will also be called the very first time its
+    registered to get children.
+
+    Returning `False` from the registered function will disable future
+    children change calls. If the client connection is closed (using
+    the close command), the ChildrenWatch will no longer get updates.
+
+    if send_event=True in __init__, then the function will always be
+    called with second parameter, ``event``. Upon initial call or when
+    recovering a lost session the ``event`` is always ``None``.
+    Otherwise it's a :class:`~kazoo.prototype.state.WatchedEvent`
+    instance.
+
+    Example with client:
+
+    .. code-block:: python
+
+        @client.ChildrenWatch('/path/to/watch')
+        def my_func(children):
+            print "Children are %s" % children
+
+        # Above function is called immediately and prints children
+
+    """
+    def __init__(self, client, path, func=None,
+                 allow_session_lost=True, send_event=False):
+        """Create a children watcher for a path
+
+        :param client: A zookeeper client.
+        :type client: :class:`~kazoo.client.KazooClient`
+        :param path: The path to watch for children on.
+        :type path: str
+        :param func: Function to call initially and every time the
+                     children change. `func` will be called with a
+                     single argument, the list of children.
+        :type func: callable
+        :param allow_session_lost: Whether the watch should be
+                                   re-registered if the zookeeper
+                                   session is lost.
+        :type allow_session_lost: bool
+        :type send_event: bool
+        :param send_event: Whether the function should be passed the
+                           event sent by ZooKeeper or None upon
+                           initialization (see class documentation)
+
+        The path must already exist for the children watcher to
+        run.
+
+        """
+        self._client = client
+        self._path = path
+        self._func = func
+        self._send_event = send_event
+        self._stopped = False
+        self._watch_established = False
+        self._allow_session_lost = allow_session_lost
+        self._run_lock = client.handler.lock_object()
+        self._prior_children = None
+        self._used = False
+
+        # Register our session listener if we're going to resume
+        # across session losses
+        if func is not None:
+            self._used = True
+            if allow_session_lost:
+                self._client.add_listener(self._session_watcher)
+            self._get_children()
+
+    def __call__(self, func):
+        """Callable version for use as a decorator
+
+        :param func: Function to call initially and every time the
+                     children change. `func` will be called with a
+                     single argument, the list of children.
+        :type func: callable
+
+        """
+        if self._used:
+            raise KazooException(
+                "A function has already been associated with this "
+                "ChildrenWatch instance.")
+
+        self._func = func
+
+        self._used = True
+        if self._allow_session_lost:
+            self._client.add_listener(self._session_watcher)
+        self._get_children()
+        return func
+
+    @_ignore_closed
+    def _get_children(self, event=None):
+        with self._run_lock:  # Ensure this runs one at a time
+            if self._stopped:
+                return
+
+            children = self._client.retry(self._client.get_children,
+                                          self._path, self._watcher)
+            if not self._watch_established:
+                self._watch_established = True
+
+                if self._prior_children is not None and \
+                   self._prior_children == children:
+                    return
+
+            self._prior_children = children
+
+            try:
+                if self._send_event:
+                    result = self._func(children, event)
+                else:
+                    result = self._func(children)
+                if result is False:
+                    self._stopped = True
+            except Exception as exc:
+                log.exception(exc)
+                raise
+
+    def _watcher(self, event):
+        self._get_children(event)
+
+    def _session_watcher(self, state):
+        if state in (KazooState.LOST, KazooState.SUSPENDED):
+            self._watch_established = False
+        elif state == KazooState.CONNECTED and \
+             not self._watch_established and not self._stopped:
+            self._client.handler.spawn(self._get_children)
+
+
+class PatientChildrenWatch(object):
+    """Patient Children Watch that returns values after the children
+    of a node don't change for a period of time
+
+    A separate watcher for the children of a node, that ignores
+    changes within a boundary time and sets the result only when the
+    boundary time has elapsed with no children changes.
+
+    Example::
+
+        watcher = PatientChildrenWatch(client, '/some/path',
+                                       time_boundary=5)
+        async_object = watcher.start()
+
+        # Blocks until the children have not changed for time boundary
+        # (5 in this case) seconds, returns children list and an
+        # async_result that will be set if the children change in the
+        # future
+        children, child_async = async_object.get()
+
+    .. note::
+
+        This Watch is different from :class:`DataWatch` and
+        :class:`ChildrenWatch` as it only returns once, does not take
+        a function that is called, and provides an
+        :class:`~kazoo.interfaces.IAsyncResult` object that can be
+        checked to see if the children have changed later.
+
+    """
+    def __init__(self, client, path, time_boundary=30):
+        self.client = client
+        self.path = path
+        self.children = []
+        self.time_boundary = time_boundary
+        self.children_changed = client.handler.event_object()
+
+    def start(self):
+        """Begin the watching process asynchronously
+
+        :returns: An :class:`~kazoo.interfaces.IAsyncResult` instance
+                  that will be set when no change has occurred to the
+                  children for time boundary seconds.
+
+        """
+        self.asy = asy = self.client.handler.async_result()
+        self.client.handler.spawn(self._inner_start)
+        return asy
+
+    def _inner_start(self):
+        try:
+            while True:
+                async_result = self.client.handler.async_result()
+                self.children = self.client.retry(
+                    self.client.get_children, self.path,
+                    partial(self._children_watcher, async_result))
+                self.client.handler.sleep_func(self.time_boundary)
+
+                if self.children_changed.is_set():
+                    self.children_changed.clear()
+                else:
+                    break
+
+            self.asy.set((self.children, async_result))
+        except Exception as exc:
+            self.asy.set_exception(exc)
+
+    def _children_watcher(self, async, event):
+        self.children_changed.set()
+        async.set(time.time())
diff --git a/slider-agent/src/main/python/kazoo/retry.py b/slider-agent/src/main/python/kazoo/retry.py
new file mode 100644
index 0000000..229b99d
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/retry.py
@@ -0,0 +1,151 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import logging
+import random
+import time
+
+from kazoo.exceptions import (
+    ConnectionClosedError,
+    ConnectionLoss,
+    KazooException,
+    OperationTimeoutError,
+    SessionExpiredError,
+)
+
+log = logging.getLogger(__name__)
+
+
+class ForceRetryError(Exception):
+    """Raised when some recipe logic wants to force a retry."""
+
+
+class RetryFailedError(KazooException):
+    """Raised when retrying an operation ultimately failed, after
+    retrying the maximum number of attempts.
+    """
+
+
+class InterruptedError(RetryFailedError):
+    """Raised when the retry is forcibly interrupted by the interrupt
+    function"""
+
+
+class KazooRetry(object):
+    """Helper for retrying a method in the face of retry-able
+    exceptions"""
+    RETRY_EXCEPTIONS = (
+        ConnectionLoss,
+        OperationTimeoutError,
+        ForceRetryError
+    )
+
+    EXPIRED_EXCEPTIONS = (
+        SessionExpiredError,
+    )
+
+    def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=0.8,
+                 max_delay=3600, ignore_expire=True, sleep_func=time.sleep,
+                 deadline=None, interrupt=None):
+        """Create a :class:`KazooRetry` instance for retrying function
+        calls
+
+        :param max_tries: How many times to retry the command. -1 means
+                          infinite tries.
+        :param delay: Initial delay between retry attempts.
+        :param backoff: Backoff multiplier between retry attempts.
+                        Defaults to 2 for exponential backoff.
+        :param max_jitter: Additional max jitter period to wait between
+                           retry attempts to avoid slamming the server.
+        :param max_delay: Maximum delay in seconds, regardless of other
+                          backoff settings. Defaults to one hour.
+        :param ignore_expire:
+            Whether a session expiration should be ignored and treated
+            as a retry-able command.
+        :param interrupt:
+            Function that will be called with no args that may return
+            True if the retry should be ceased immediately. This will
+            be called no more than every 0.1 seconds during a wait
+            between retries.
+
+        """
+        self.max_tries = max_tries
+        self.delay = delay
+        self.backoff = backoff
+        self.max_jitter = int(max_jitter * 100)
+        self.max_delay = float(max_delay)
+        self._attempts = 0
+        self._cur_delay = delay
+        self.deadline = deadline
+        self._cur_stoptime = None
+        self.sleep_func = sleep_func
+        self.retry_exceptions = self.RETRY_EXCEPTIONS
+        self.interrupt = interrupt
+        if ignore_expire:
+            self.retry_exceptions += self.EXPIRED_EXCEPTIONS
+
+    def reset(self):
+        """Reset the attempt counter"""
+        self._attempts = 0
+        self._cur_delay = self.delay
+        self._cur_stoptime = None
+
+    def copy(self):
+        """Return a clone of this retry manager"""
+        obj = KazooRetry(max_tries=self.max_tries,
+                         delay=self.delay,
+                         backoff=self.backoff,
+                         max_jitter=self.max_jitter / 100.0,
+                         max_delay=self.max_delay,
+                         sleep_func=self.sleep_func,
+                         deadline=self.deadline,
+                         interrupt=self.interrupt)
+        obj.retry_exceptions = self.retry_exceptions
+        return obj
+
+    def __call__(self, func, *args, **kwargs):
+        """Call a function with arguments until it completes without
+        throwing a Kazoo exception
+
+        :param func: Function to call
+        :param args: Positional arguments to call the function with
+        :params kwargs: Keyword arguments to call the function with
+
+        The function will be called until it doesn't throw one of the
+        retryable exceptions (ConnectionLoss, OperationTimeout, or
+        ForceRetryError), and optionally retrying on session
+        expiration.
+
+        """
+        self.reset()
+
+        while True:
+            try:
+                if self.deadline is not None and self._cur_stoptime is None:
+                    self._cur_stoptime = time.time() + self.deadline
+                return func(*args, **kwargs)
+            except ConnectionClosedError:
+                raise
+            except self.retry_exceptions:
+                # Note: max_tries == -1 means infinite tries.
+                if self._attempts == self.max_tries:
+                    raise RetryFailedError("Too many retry attempts")
+                self._attempts += 1
+                sleeptime = self._cur_delay + (random.randint(0, self.max_jitter) / 100.0)
+
+                if self._cur_stoptime is not None and time.time() + sleeptime >= self._cur_stoptime:
+                    raise RetryFailedError("Exceeded retry deadline")
+
+                if self.interrupt:
+                    while sleeptime > 0:
+                        # Break the time period down and sleep for no longer than
+                        # 0.1 before calling the interrupt
+                        if sleeptime < 0.1:
+                            self.sleep_func(sleeptime)
+                            sleeptime -= sleeptime
+                        else:
+                            self.sleep_func(0.1)
+                            sleeptime -= 0.1
+                        if self.interrupt():
+                            raise InterruptedError()
+                else:
+                    self.sleep_func(sleeptime)
+                self._cur_delay = min(self._cur_delay * self.backoff, self.max_delay)
diff --git a/slider-agent/src/main/python/kazoo/security.py b/slider-agent/src/main/python/kazoo/security.py
new file mode 100644
index 0000000..4532489
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/security.py
@@ -0,0 +1,139 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo Security"""
+from base64 import b64encode
+from collections import namedtuple
+import hashlib
+
+
+# Represents a Zookeeper ID and ACL object
+Id = namedtuple('Id', 'scheme id')
+
+
+class ACL(namedtuple('ACL', 'perms id')):
+    """An ACL for a Zookeeper Node
+
+    An ACL object is created by using an :class:`Id` object along with
+    a :class:`Permissions` setting. For convenience,
+    :meth:`make_digest_acl` should be used to create an ACL object with
+    the desired scheme, id, and permissions.
+
+    """
+    @property
+    def acl_list(self):
+        perms = []
+        if self.perms & Permissions.ALL == Permissions.ALL:
+            perms.append('ALL')
+            return perms
+        if self.perms & Permissions.READ == Permissions.READ:
+            perms.append('READ')
+        if self.perms & Permissions.WRITE == Permissions.WRITE:
+            perms.append('WRITE')
+        if self.perms & Permissions.CREATE == Permissions.CREATE:
+            perms.append('CREATE')
+        if self.perms & Permissions.DELETE == Permissions.DELETE:
+            perms.append('DELETE')
+        if self.perms & Permissions.ADMIN == Permissions.ADMIN:
+            perms.append('ADMIN')
+        return perms
+
+    def __repr__(self):
+        return 'ACL(perms=%r, acl_list=%s, id=%r)' % (
+            self.perms, self.acl_list, self.id)
+
+
+class Permissions(object):
+    READ = 1
+    WRITE = 2
+    CREATE = 4
+    DELETE = 8
+    ADMIN = 16
+    ALL = 31
+
+
+# Shortcuts for common Ids
+ANYONE_ID_UNSAFE = Id('world', 'anyone')
+AUTH_IDS = Id('auth', '')
+
+# Shortcuts for common ACLs
+OPEN_ACL_UNSAFE = [ACL(Permissions.ALL, ANYONE_ID_UNSAFE)]
+CREATOR_ALL_ACL = [ACL(Permissions.ALL, AUTH_IDS)]
+READ_ACL_UNSAFE = [ACL(Permissions.READ, ANYONE_ID_UNSAFE)]
+
+
+def make_digest_acl_credential(username, password):
+    """Create a SHA1 digest credential"""
+    credential = username.encode('utf-8') + b":" + password.encode('utf-8')
+    cred_hash = b64encode(hashlib.sha1(credential).digest()).strip()
+    return username + ":" + cred_hash.decode('utf-8')
+
+
+def make_acl(scheme, credential, read=False, write=False,
+             create=False, delete=False, admin=False, all=False):
+    """Given a scheme and credential, return an :class:`ACL` object
+    appropriate for use with Kazoo.
+
+    :param scheme: The scheme to use. I.e. `digest`.
+    :param credential:
+        A colon separated username, password. The password should be
+        hashed with the `scheme` specified. The
+        :meth:`make_digest_acl_credential` method will create and
+        return a credential appropriate for use with the `digest`
+        scheme.
+    :param write: Write permission.
+    :type write: bool
+    :param create: Create permission.
+    :type create: bool
+    :param delete: Delete permission.
+    :type delete: bool
+    :param admin: Admin permission.
+    :type admin: bool
+    :param all: All permissions.
+    :type all: bool
+
+    :rtype: :class:`ACL`
+
+    """
+    if all:
+        permissions = Permissions.ALL
+    else:
+        permissions = 0
+        if read:
+            permissions |= Permissions.READ
+        if write:
+            permissions |= Permissions.WRITE
+        if create:
+            permissions |= Permissions.CREATE
+        if delete:
+            permissions |= Permissions.DELETE
+        if admin:
+            permissions |= Permissions.ADMIN
+    return ACL(permissions, Id(scheme, credential))
+
+
+def make_digest_acl(username, password, read=False, write=False,
+                    create=False, delete=False, admin=False, all=False):
+    """Create a digest ACL for Zookeeper with the given permissions
+
+    This method combines :meth:`make_digest_acl_credential` and
+    :meth:`make_acl` to create an :class:`ACL` object appropriate for
+    use with Kazoo's ACL methods.
+
+    :param username: Username to use for the ACL.
+    :param password: A plain-text password to hash.
+    :param write: Write permission.
+    :type write: bool
+    :param create: Create permission.
+    :type create: bool
+    :param delete: Delete permission.
+    :type delete: bool
+    :param admin: Admin permission.
+    :type admin: bool
+    :param all: All permissions.
+    :type all: bool
+
+    :rtype: :class:`ACL`
+
+    """
+    cred = make_digest_acl_credential(username, password)
+    return make_acl("digest", cred, read=read, write=write, create=create,
+        delete=delete, admin=admin, all=all)
diff --git a/slider-agent/src/main/python/kazoo/testing/__init__.py b/slider-agent/src/main/python/kazoo/testing/__init__.py
new file mode 100644
index 0000000..660546b
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/testing/__init__.py
@@ -0,0 +1,6 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+from kazoo.testing.harness import KazooTestCase
+from kazoo.testing.harness import KazooTestHarness
+
+
+__all__ = ('KazooTestHarness', 'KazooTestCase', )
diff --git a/slider-agent/src/main/python/kazoo/testing/common.py b/slider-agent/src/main/python/kazoo/testing/common.py
new file mode 100644
index 0000000..b497a8e
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/testing/common.py
@@ -0,0 +1,284 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+#
+#  Copyright (C) 2010-2011, 2011 Canonical Ltd. All Rights Reserved
+#
+#  This file was originally taken from txzookeeper and modified later.
+#
+#  Authors:
+#   Kapil Thangavelu and the Kazoo team
+#
+#  txzookeeper is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU Lesser General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  txzookeeper is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public License
+#  along with txzookeeper.  If not, see <http://www.gnu.org/licenses/>.
+
+
+import code
+import os
+import os.path
+import shutil
+import signal
+import subprocess
+import tempfile
+import traceback
+
+from itertools import chain
+from collections import namedtuple
+from glob import glob
+
+
+def debug(sig, frame):
+    """Interrupt running process, and provide a python prompt for
+    interactive debugging."""
+    d = {'_frame': frame}         # Allow access to frame object.
+    d.update(frame.f_globals)  # Unless shadowed by global
+    d.update(frame.f_locals)
+
+    i = code.InteractiveConsole(d)
+    message = "Signal recieved : entering python shell.\nTraceback:\n"
+    message += ''.join(traceback.format_stack(frame))
+    i.interact(message)
+
+
+def listen():
+    if os.name != 'nt':  # SIGUSR1 is not supported on Windows
+        signal.signal(signal.SIGUSR1, debug)  # Register handler
+listen()
+
+
+def to_java_compatible_path(path):
+    if os.name == 'nt':
+        path = path.replace('\\', '/')
+    return path
+
+ServerInfo = namedtuple(
+    "ServerInfo", "server_id client_port election_port leader_port")
+
+
+class ManagedZooKeeper(object):
+    """Class to manage the running of a ZooKeeper instance for testing.
+
+    Note: no attempt is made to probe the ZooKeeper instance is
+    actually available, or that the selected port is free. In the
+    future, we may want to do that, especially when run in a
+    Hudson/Buildbot context, to ensure more test robustness."""
+
+    def __init__(self, software_path, server_info, peers=(), classpath=None):
+        """Define the ZooKeeper test instance.
+
+        @param install_path: The path to the install for ZK
+        @param port: The port to run the managed ZK instance
+        """
+        self.install_path = software_path
+        self._classpath = classpath
+        self.server_info = server_info
+        self.host = "127.0.0.1"
+        self.peers = peers
+        self.working_path = tempfile.mkdtemp()
+        self._running = False
+
+    def run(self):
+        """Run the ZooKeeper instance under a temporary directory.
+
+        Writes ZK log messages to zookeeper.log in the current directory.
+        """
+        if self.running:
+            return
+        config_path = os.path.join(self.working_path, "zoo.cfg")
+        log_path = os.path.join(self.working_path, "log")
+        log4j_path = os.path.join(self.working_path, "log4j.properties")
+        data_path = os.path.join(self.working_path, "data")
+
+        # various setup steps
+        if not os.path.exists(self.working_path):
+            os.mkdir(self.working_path)
+        if not os.path.exists(log_path):
+            os.mkdir(log_path)
+        if not os.path.exists(data_path):
+            os.mkdir(data_path)
+
+        with open(config_path, "w") as config:
+            config.write("""
+tickTime=2000
+dataDir=%s
+clientPort=%s
+maxClientCnxns=0
+""" % (to_java_compatible_path(data_path), self.server_info.client_port))
+
+        # setup a replicated setup if peers are specified
+        if self.peers:
+            servers_cfg = []
+            for p in chain((self.server_info,), self.peers):
+                servers_cfg.append("server.%s=localhost:%s:%s" % (
+                    p.server_id, p.leader_port, p.election_port))
+
+            with open(config_path, "a") as config:
+                config.write("""
+initLimit=4
+syncLimit=2
+%s
+""" % ("\n".join(servers_cfg)))
+
+        # Write server ids into datadir
+        with open(os.path.join(data_path, "myid"), "w") as myid_file:
+            myid_file.write(str(self.server_info.server_id))
+
+        with open(log4j_path, "w") as log4j:
+            log4j.write("""
+# DEFAULT: console appender only
+log4j.rootLogger=INFO, ROLLINGFILE
+log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
+log4j.appender.ROLLINGFILE.Threshold=DEBUG
+log4j.appender.ROLLINGFILE.File=""" + to_java_compatible_path(
+                self.working_path + os.sep + "zookeeper.log\n"))
+
+        self.process = subprocess.Popen(
+            args=["java",
+                  "-cp", self.classpath,
+                  "-Dreadonlymode.enabled=true",
+                  "-Dzookeeper.log.dir=%s" % log_path,
+                  "-Dzookeeper.root.logger=INFO,CONSOLE",
+                  "-Dlog4j.configuration=file:%s" % log4j_path,
+                  # "-Dlog4j.debug",
+                  "org.apache.zookeeper.server.quorum.QuorumPeerMain",
+                  config_path])
+        self._running = True
+
+    @property
+    def classpath(self):
+        """Get the classpath necessary to run ZooKeeper."""
+
+        if self._classpath:
+            return self._classpath
+
+        # Two possibilities, as seen in zkEnv.sh:
+        # Check for a release - top-level zookeeper-*.jar?
+        jars = glob((os.path.join(
+            self.install_path, 'zookeeper-*.jar')))
+        if jars:
+            # Release build (`ant package`)
+            jars.extend(glob(os.path.join(
+                self.install_path,
+                "lib/*.jar")))
+            # support for different file locations on Debian/Ubuntu
+            jars.extend(glob(os.path.join(
+                self.install_path,
+                "log4j-*.jar")))
+            jars.extend(glob(os.path.join(
+                self.install_path,
+                "slf4j-api-*.jar")))
+            jars.extend(glob(os.path.join(
+                self.install_path,
+                "slf4j-log4j-*.jar")))
+        else:
+            # Development build (plain `ant`)
+            jars = glob((os.path.join(
+                self.install_path, 'build/zookeeper-*.jar')))
+            jars.extend(glob(os.path.join(
+                self.install_path,
+                "build/lib/*.jar")))
+
+        return os.pathsep.join(jars)
+
+    @property
+    def address(self):
+        """Get the address of the ZooKeeper instance."""
+        return "%s:%s" % (self.host, self.client_port)
+
+    @property
+    def running(self):
+        return self._running
+
+    @property
+    def client_port(self):
+        return self.server_info.client_port
+
+    def reset(self):
+        """Stop the zookeeper instance, cleaning out its on disk-data."""
+        self.stop()
+        shutil.rmtree(os.path.join(self.working_path, "data"))
+        os.mkdir(os.path.join(self.working_path, "data"))
+        with open(os.path.join(self.working_path, "data", "myid"), "w") as fh:
+            fh.write(str(self.server_info.server_id))
+
+    def stop(self):
+        """Stop the Zookeeper instance, retaining on disk state."""
+        if not self.running:
+            return
+        self.process.terminate()
+        self.process.wait()
+        self._running = False
+
+    def destroy(self):
+        """Stop the ZooKeeper instance and destroy its on disk-state"""
+        # called by at exit handler, reimport to avoid cleanup race.
+        import shutil
+        self.stop()
+
+        shutil.rmtree(self.working_path)
+
+
+class ZookeeperCluster(object):
+
+    def __init__(self, install_path=None, classpath=None, size=3, port_offset=20000):
+        self._install_path = install_path
+        self._classpath = classpath
+        self._servers = []
+
+        # Calculate ports and peer group
+        port = port_offset
+        peers = []
+
+        for i in range(size):
+            info = ServerInfo(i + 1, port, port + 1, port + 2)
+            peers.append(info)
+            port += 10
+
+        # Instantiate Managed ZK Servers
+        for i in range(size):
+            server_peers = list(peers)
+            server_info = server_peers.pop(i)
+            self._servers.append(
+                ManagedZooKeeper(
+                    self._install_path, server_info, server_peers, classpath=self._classpath))
+
+    def __getitem__(self, k):
+        return self._servers[k]
+
+    def __iter__(self):
+        return iter(self._servers)
+
+    def start(self):
+        # Zookeeper client expresses a preference for either lower ports or
+        # lexicographical ordering of hosts, to ensure that all servers have a
+        # chance to startup, start them in reverse order.
+        for server in reversed(list(self)):
+            server.run()
+        # Giving the servers a moment to start, decreases the overall time
+        # required for a client to successfully connect (2s vs. 4s without
+        # the sleep).
+        import time
+        time.sleep(2)
+
+    def stop(self):
+        for server in self:
+            server.stop()
+        self._servers = []
+
+    def terminate(self):
+        for server in self:
+            server.destroy()
+
+    def reset(self):
+        for server in self:
+            server.reset()
diff --git a/slider-agent/src/main/python/kazoo/testing/harness.py b/slider-agent/src/main/python/kazoo/testing/harness.py
new file mode 100644
index 0000000..93cc744
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/testing/harness.py
@@ -0,0 +1,181 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+"""Kazoo testing harnesses"""
+import atexit
+import logging
+import os
+import uuid
+import threading
+import unittest
+
+from kazoo.client import KazooClient
+from kazoo.exceptions import NotEmptyError
+from kazoo.protocol.states import (
+    KazooState
+)
+from kazoo.testing.common import ZookeeperCluster
+from kazoo.protocol.connection import _SESSION_EXPIRED
+
+log = logging.getLogger(__name__)
+
+CLUSTER = None
+
+
+def get_global_cluster():
+    global CLUSTER
+    if CLUSTER is None:
+        ZK_HOME = os.environ.get("ZOOKEEPER_PATH")
+        ZK_CLASSPATH = os.environ.get("ZOOKEEPER_CLASSPATH")
+        ZK_PORT_OFFSET = int(os.environ.get("ZOOKEEPER_PORT_OFFSET", 20000))
+
+        assert ZK_HOME or ZK_CLASSPATH, (
+            "either ZOOKEEPER_PATH or ZOOKEEPER_CLASSPATH environment variable "
+            "must be defined.\n"
+            "For deb package installations this is /usr/share/java")
+
+        CLUSTER = ZookeeperCluster(
+            install_path=ZK_HOME,
+            classpath=ZK_CLASSPATH,
+            port_offset=ZK_PORT_OFFSET,
+        )
+        atexit.register(lambda cluster: cluster.terminate(), CLUSTER)
+    return CLUSTER
+
+
+class KazooTestHarness(unittest.TestCase):
+    """Harness for testing code that uses Kazoo
+
+    This object can be used directly or as a mixin. It supports starting
+    and stopping a complete ZooKeeper cluster locally and provides an
+    API for simulating errors and expiring sessions.
+
+    Example::
+
+        class MyTestCase(KazooTestHarness):
+            def setUp(self):
+                self.setup_zookeeper()
+
+                # additional test setup
+
+            def tearDown(self):
+                self.teardown_zookeeper()
+
+            def test_something(self):
+                something_that_needs_a_kazoo_client(self.client)
+
+            def test_something_else(self):
+                something_that_needs_zk_servers(self.servers)
+
+    """
+
+    def __init__(self, *args, **kw):
+        super(KazooTestHarness, self).__init__(*args, **kw)
+        self.client = None
+        self._clients = []
+
+    @property
+    def cluster(self):
+        return get_global_cluster()
+
+    @property
+    def servers(self):
+        return ",".join([s.address for s in self.cluster])
+
+    def _get_nonchroot_client(self):
+        return KazooClient(self.servers)
+
+    def _get_client(self, **kwargs):
+        c = KazooClient(self.hosts, **kwargs)
+        try:
+            self._clients.append(c)
+        except AttributeError:
+            self._client = [c]
+        return c
+
+    def expire_session(self, client_id=None):
+        """Force ZK to expire a client session
+
+        :param client_id: id of client to expire. If unspecified, the id of
+                          self.client will be used.
+
+        """
+        client_id = client_id or self.client.client_id
+
+        lost = threading.Event()
+        safe = threading.Event()
+
+        def watch_loss(state):
+            if state == KazooState.LOST:
+                lost.set()
+            if lost.is_set() and state == KazooState.CONNECTED:
+                safe.set()
+                return True
+
+        self.client.add_listener(watch_loss)
+
+        self.client._call(_SESSION_EXPIRED, None)
+
+        lost.wait(5)
+        if not lost.isSet():
+            raise Exception("Failed to get notified of session loss")
+
+        # Wait for the reconnect now
+        safe.wait(15)
+        if not safe.isSet():
+            raise Exception("Failed to see client reconnect")
+        self.client.retry(self.client.get_async, '/')
+
+    def setup_zookeeper(self, **client_options):
+        """Create a ZK cluster and chrooted :class:`KazooClient`
+
+        The cluster will only be created on the first invocation and won't be
+        fully torn down until exit.
+        """
+        if not self.cluster[0].running:
+            self.cluster.start()
+        namespace = "/kazootests" + uuid.uuid4().hex
+        self.hosts = self.servers + namespace
+
+        if 'timeout' not in client_options:
+            client_options['timeout'] = 0.8
+        self.client = self._get_client(**client_options)
+        self.client.start()
+        self.client.ensure_path("/")
+
+    def teardown_zookeeper(self):
+        """Clean up any ZNodes created during the test
+        """
+        if not self.cluster[0].running:
+            self.cluster.start()
+
+        tries = 0
+        if self.client and self.client.connected:
+            while tries < 3:
+                try:
+                    self.client.retry(self.client.delete, '/', recursive=True)
+                    break
+                except NotEmptyError:
+                    pass
+                tries += 1
+            self.client.stop()
+            self.client.close()
+            del self.client
+        else:
+            client = self._get_client()
+            client.start()
+            client.retry(client.delete, '/', recursive=True)
+            client.stop()
+            client.close()
+            del client
+
+        for client in self._clients:
+            client.stop()
+            del client
+        self._clients = None
+
+
+class KazooTestCase(KazooTestHarness):
+    def setUp(self):
+        self.setup_zookeeper()
+
+    def tearDown(self):
+        self.teardown_zookeeper()
diff --git a/slider-agent/src/main/python/kazoo/tests/__init__.py b/slider-agent/src/main/python/kazoo/tests/__init__.py
new file mode 100644
index 0000000..901253b
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/__init__.py
@@ -0,0 +1 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
diff --git a/slider-agent/src/main/python/kazoo/tests/test_barrier.py b/slider-agent/src/main/python/kazoo/tests/test_barrier.py
new file mode 100644
index 0000000..461664f
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_barrier.py
@@ -0,0 +1,158 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import threading
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+
+
+class KazooBarrierTests(KazooTestCase):
+    def test_barrier_not_exist(self):
+        b = self.client.Barrier("/some/path")
+        eq_(b.wait(), True)
+
+    def test_barrier_exists(self):
+        b = self.client.Barrier("/some/path")
+        b.create()
+        eq_(b.wait(0), False)
+        b.remove()
+        eq_(b.wait(), True)
+
+    def test_remove_nonexistent_barrier(self):
+        b = self.client.Barrier("/some/path")
+        eq_(b.remove(), False)
+
+
+class KazooDoubleBarrierTests(KazooTestCase):
+
+    def test_basic_barrier(self):
+        b = self.client.DoubleBarrier("/some/path", 1)
+        eq_(b.participating, False)
+        b.enter()
+        eq_(b.participating, True)
+        b.leave()
+        eq_(b.participating, False)
+
+    def test_two_barrier(self):
+        av = threading.Event()
+        ev = threading.Event()
+        bv = threading.Event()
+        release_all = threading.Event()
+        b1 = self.client.DoubleBarrier("/some/path", 2)
+        b2 = self.client.DoubleBarrier("/some/path", 2)
+
+        def make_barrier_one():
+            b1.enter()
+            ev.set()
+            release_all.wait()
+            b1.leave()
+            ev.set()
+
+        def make_barrier_two():
+            bv.wait()
+            b2.enter()
+            av.set()
+            release_all.wait()
+            b2.leave()
+            av.set()
+
+        # Spin up both of them
+        t1 = threading.Thread(target=make_barrier_one)
+        t1.start()
+        t2 = threading.Thread(target=make_barrier_two)
+        t2.start()
+
+        eq_(b1.participating, False)
+        eq_(b2.participating, False)
+
+        bv.set()
+        av.wait()
+        ev.wait()
+        eq_(b1.participating, True)
+        eq_(b2.participating, True)
+
+        av.clear()
+        ev.clear()
+
+        release_all.set()
+        av.wait()
+        ev.wait()
+        eq_(b1.participating, False)
+        eq_(b2.participating, False)
+        t1.join()
+        t2.join()
+
+    def test_three_barrier(self):
+        av = threading.Event()
+        ev = threading.Event()
+        bv = threading.Event()
+        release_all = threading.Event()
+        b1 = self.client.DoubleBarrier("/some/path", 3)
+        b2 = self.client.DoubleBarrier("/some/path", 3)
+        b3 = self.client.DoubleBarrier("/some/path", 3)
+
+        def make_barrier_one():
+            b1.enter()
+            ev.set()
+            release_all.wait()
+            b1.leave()
+            ev.set()
+
+        def make_barrier_two():
+            bv.wait()
+            b2.enter()
+            av.set()
+            release_all.wait()
+            b2.leave()
+            av.set()
+
+        # Spin up both of them
+        t1 = threading.Thread(target=make_barrier_one)
+        t1.start()
+        t2 = threading.Thread(target=make_barrier_two)
+        t2.start()
+
+        eq_(b1.participating, False)
+        eq_(b2.participating, False)
+
+        bv.set()
+        eq_(b1.participating, False)
+        eq_(b2.participating, False)
+        b3.enter()
+        ev.wait()
+        av.wait()
+
+        eq_(b1.participating, True)
+        eq_(b2.participating, True)
+        eq_(b3.participating, True)
+
+        av.clear()
+        ev.clear()
+
+        release_all.set()
+        b3.leave()
+        av.wait()
+        ev.wait()
+        eq_(b1.participating, False)
+        eq_(b2.participating, False)
+        eq_(b3.participating, False)
+        t1.join()
+        t2.join()
+
+    def test_barrier_existing_parent_node(self):
+        b = self.client.DoubleBarrier('/some/path', 1)
+        self.assertFalse(b.participating)
+        self.client.create('/some', ephemeral=True)
+        # the barrier cannot create children under an ephemeral node
+        b.enter()
+        self.assertFalse(b.participating)
+
+    def test_barrier_existing_node(self):
+        b = self.client.DoubleBarrier('/some', 1)
+        self.assertFalse(b.participating)
+        self.client.ensure_path(b.path)
+        self.client.create(b.create_path, ephemeral=True)
+        # the barrier will re-use an existing node
+        b.enter()
+        self.assertTrue(b.participating)
+        b.leave()
diff --git a/slider-agent/src/main/python/kazoo/tests/test_build.py b/slider-agent/src/main/python/kazoo/tests/test_build.py
new file mode 100644
index 0000000..0f75d7c
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_build.py
@@ -0,0 +1,30 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import os
+
+from nose import SkipTest
+
+from kazoo.testing import KazooTestCase
+
+
+class TestBuildEnvironment(KazooTestCase):
+
+    def setUp(self):
+        KazooTestCase.setUp(self)
+        if not os.environ.get('TRAVIS'):
+            raise SkipTest('Only run build config tests on Travis.')
+
+    def test_gevent_version(self):
+        try:
+            import gevent
+        except ImportError:
+            raise SkipTest('gevent not available.')
+        env_version = os.environ.get('GEVENT_VERSION')
+        if env_version:
+            self.assertEqual(env_version, gevent.__version__)
+
+    def test_zookeeper_version(self):
+        server_version = self.client.server_version()
+        server_version = '.'.join([str(i) for i in server_version])
+        env_version = os.environ.get('ZOOKEEPER_VERSION')
+        if env_version:
+            self.assertEqual(env_version, server_version)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_client.py b/slider-agent/src/main/python/kazoo/tests/test_client.py
new file mode 100644
index 0000000..eb19ef5
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_client.py
@@ -0,0 +1,1099 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import socket
+import sys
+import threading
+import time
+import uuid
+import unittest
+
+from mock import patch
+from nose import SkipTest
+from nose.tools import eq_
+from nose.tools import raises
+
+from kazoo.testing import KazooTestCase
+from kazoo.exceptions import (
+    AuthFailedError,
+    BadArgumentsError,
+    ConfigurationError,
+    ConnectionClosedError,
+    ConnectionLoss,
+    InvalidACLError,
+    NoAuthError,
+    NoNodeError,
+    NodeExistsError,
+    SessionExpiredError,
+)
+from kazoo.protocol.connection import _CONNECTION_DROP
+from kazoo.protocol.states import KeeperState, KazooState
+from kazoo.tests.util import TRAVIS_ZK_VERSION
+
+
+if sys.version_info > (3, ):  # pragma: nocover
+    def u(s):
+        return s
+else:  # pragma: nocover
+    def u(s):
+        return unicode(s, "unicode_escape")
+
+
+class TestClientTransitions(KazooTestCase):
+    def test_connection_and_disconnection(self):
+        states = []
+        rc = threading.Event()
+
+        @self.client.add_listener
+        def listener(state):
+            states.append(state)
+            if state == KazooState.CONNECTED:
+                rc.set()
+
+        self.client.stop()
+        eq_(states, [KazooState.LOST])
+        states.pop()
+
+        self.client.start()
+        rc.wait(2)
+        eq_(states, [KazooState.CONNECTED])
+        rc.clear()
+        states.pop()
+        self.expire_session()
+        rc.wait(2)
+
+        req_states = [KazooState.LOST, KazooState.CONNECTED]
+        eq_(states, req_states)
+
+
+class TestClientConstructor(unittest.TestCase):
+
+    def _makeOne(self, *args, **kw):
+        from kazoo.client import KazooClient
+        return KazooClient(*args, **kw)
+
+    def test_invalid_handler(self):
+        from kazoo.handlers.threading import SequentialThreadingHandler
+        self.assertRaises(ConfigurationError,
+            self._makeOne, handler=SequentialThreadingHandler)
+
+    def test_chroot(self):
+        self.assertEqual(self._makeOne(hosts='127.0.0.1:2181/').chroot, '')
+        self.assertEqual(self._makeOne(hosts='127.0.0.1:2181/a').chroot, '/a')
+        self.assertEqual(self._makeOne(hosts='127.0.0.1/a').chroot, '/a')
+        self.assertEqual(self._makeOne(hosts='127.0.0.1/a/b').chroot, '/a/b')
+        self.assertEqual(self._makeOne(
+            hosts='127.0.0.1:2181,127.0.0.1:2182/a/b').chroot, '/a/b')
+
+    def test_connection_timeout(self):
+        from kazoo.handlers.threading import TimeoutError
+        client = self._makeOne(hosts='127.0.0.1:9')
+        self.assertTrue(client.handler.timeout_exception is TimeoutError)
+        self.assertRaises(TimeoutError, client.start, 0.1)
+
+    def test_ordered_host_selection(self):
+        client = self._makeOne(hosts='127.0.0.1:9,127.0.0.2:9/a',
+            randomize_hosts=False)
+        hosts = [h for h in client.hosts]
+        eq_(hosts, [('127.0.0.1', 9), ('127.0.0.2', 9)])
+
+    def test_invalid_hostname(self):
+        client = self._makeOne(hosts='nosuchhost/a')
+        timeout = client.handler.timeout_exception
+        self.assertRaises(timeout, client.start, 0.1)
+
+    def test_retry_options_dict(self):
+        from kazoo.retry import KazooRetry
+        client = self._makeOne(command_retry=dict(max_tries=99),
+                               connection_retry=dict(delay=99))
+        self.assertTrue(type(client._conn_retry) is KazooRetry)
+        self.assertTrue(type(client._retry) is KazooRetry)
+        eq_(client._retry.max_tries, 99)
+        eq_(client._conn_retry.delay, 99)
+
+
+class TestAuthentication(KazooTestCase):
+
+    def _makeAuth(self, *args, **kwargs):
+        from kazoo.security import make_digest_acl
+        return make_digest_acl(*args, **kwargs)
+
+    def test_auth(self):
+        username = uuid.uuid4().hex
+        password = uuid.uuid4().hex
+
+        digest_auth = "%s:%s" % (username, password)
+        acl = self._makeAuth(username, password, all=True)
+
+        client = self._get_client()
+        client.start()
+        client.add_auth("digest", digest_auth)
+        client.default_acl = (acl,)
+
+        try:
+            client.create("/1")
+            client.create("/1/2")
+            client.ensure_path("/1/2/3")
+
+            eve = self._get_client()
+            eve.start()
+
+            self.assertRaises(NoAuthError, eve.get, "/1/2")
+
+            # try again with the wrong auth token
+            eve.add_auth("digest", "badbad:bad")
+
+            self.assertRaises(NoAuthError, eve.get, "/1/2")
+        finally:
+            # Ensure we remove the ACL protected nodes
+            client.delete("/1", recursive=True)
+            eve.stop()
+            eve.close()
+
+    def test_connect_auth(self):
+        username = uuid.uuid4().hex
+        password = uuid.uuid4().hex
+
+        digest_auth = "%s:%s" % (username, password)
+        acl = self._makeAuth(username, password, all=True)
+
+        client = self._get_client(auth_data=[('digest', digest_auth)])
+        client.start()
+        try:
+            client.create('/1', acl=(acl,))
+            # give ZK a chance to copy data to other node
+            time.sleep(0.1)
+            self.assertRaises(NoAuthError, self.client.get, "/1")
+        finally:
+            client.delete('/1')
+            client.stop()
+            client.close()
+
+    def test_unicode_auth(self):
+        username = u("xe4/\hm")
+        password = u("/\xe4hm")
+        digest_auth = "%s:%s" % (username, password)
+        acl = self._makeAuth(username, password, all=True)
+
+        client = self._get_client()
+        client.start()
+        client.add_auth("digest", digest_auth)
+        client.default_acl = (acl,)
+
+        try:
+            client.create("/1")
+            client.ensure_path("/1/2/3")
+
+            eve = self._get_client()
+            eve.start()
+
+            self.assertRaises(NoAuthError, eve.get, "/1/2")
+
+            # try again with the wrong auth token
+            eve.add_auth("digest", "badbad:bad")
+
+            self.assertRaises(NoAuthError, eve.get, "/1/2")
+        finally:
+            # Ensure we remove the ACL protected nodes
+            client.delete("/1", recursive=True)
+            eve.stop()
+            eve.close()
+
+    def test_invalid_auth(self):
+        client = self._get_client()
+        client.start()
+        self.assertRaises(TypeError, client.add_auth,
+                          'digest', ('user', 'pass'))
+        self.assertRaises(TypeError, client.add_auth,
+                          None, ('user', 'pass'))
+
+    def test_async_auth(self):
+        client = self._get_client()
+        client.start()
+        username = uuid.uuid4().hex
+        password = uuid.uuid4().hex
+        digest_auth = "%s:%s" % (username, password)
+        result = client.add_auth_async("digest", digest_auth)
+        self.assertTrue(result.get())
+
+    def test_async_auth_failure(self):
+        client = self._get_client()
+        client.start()
+        username = uuid.uuid4().hex
+        password = uuid.uuid4().hex
+        digest_auth = "%s:%s" % (username, password)
+
+        self.assertRaises(AuthFailedError, client.add_auth,
+                          "unknown-scheme", digest_auth)
+
+    def test_add_auth_on_reconnect(self):
+        client = self._get_client()
+        client.start()
+        client.add_auth("digest", "jsmith:jsmith")
+        client._connection._socket.shutdown(socket.SHUT_RDWR)
+        while not client.connected:
+            time.sleep(0.1)
+        self.assertTrue(("digest", "jsmith:jsmith") in client.auth_data)
+
+
+class TestConnection(KazooTestCase):
+
+    def test_chroot_warning(self):
+        k = self._get_nonchroot_client()
+        k.chroot = 'abba'
+        try:
+            with patch('warnings.warn') as mock_func:
+                k.start()
+                assert mock_func.called
+        finally:
+            k.stop()
+
+    def test_session_expire(self):
+        from kazoo.protocol.states import KazooState
+
+        cv = threading.Event()
+
+        def watch_events(event):
+            if event == KazooState.LOST:
+                cv.set()
+
+        self.client.add_listener(watch_events)
+        self.expire_session()
+        cv.wait(3)
+        assert cv.is_set()
+
+    def test_bad_session_expire(self):
+        from kazoo.protocol.states import KazooState
+
+        cv = threading.Event()
+        ab = threading.Event()
+
+        def watch_events(event):
+            if event == KazooState.LOST:
+                ab.set()
+                raise Exception("oops")
+                cv.set()
+
+        self.client.add_listener(watch_events)
+        self.expire_session()
+        ab.wait(0.5)
+        assert ab.is_set()
+        cv.wait(0.5)
+        assert not cv.is_set()
+
+    def test_state_listener(self):
+        from kazoo.protocol.states import KazooState
+        states = []
+        condition = threading.Condition()
+
+        def listener(state):
+            with condition:
+                states.append(state)
+                condition.notify_all()
+
+        self.client.stop()
+        eq_(self.client.state, KazooState.LOST)
+        self.client.add_listener(listener)
+        self.client.start(5)
+
+        with condition:
+            if not states:
+                condition.wait(5)
+
+        eq_(len(states), 1)
+        eq_(states[0], KazooState.CONNECTED)
+
+    def test_invalid_listener(self):
+        self.assertRaises(ConfigurationError, self.client.add_listener, 15)
+
+    def test_listener_only_called_on_real_state_change(self):
+        from kazoo.protocol.states import KazooState
+        self.assertTrue(self.client.state, KazooState.CONNECTED)
+        called = [False]
+        condition = threading.Event()
+
+        def listener(state):
+            called[0] = True
+            condition.set()
+
+        self.client.add_listener(listener)
+        self.client._make_state_change(KazooState.CONNECTED)
+        condition.wait(3)
+        self.assertFalse(called[0])
+
+    def test_no_connection(self):
+        client = self.client
+        client.stop()
+        self.assertFalse(client.connected)
+        self.assertTrue(client.client_id is None)
+        self.assertRaises(ConnectionClosedError, client.exists, '/')
+
+    def test_close_connecting_connection(self):
+        client = self.client
+        client.stop()
+        ev = threading.Event()
+
+        def close_on_connecting(state):
+            if state in (KazooState.CONNECTED, KazooState.LOST):
+                ev.set()
+
+        client.add_listener(close_on_connecting)
+        client.start()
+
+        # Wait until we connect
+        ev.wait(5)
+        ev.clear()
+        self.client._call(_CONNECTION_DROP, client.handler.async_result())
+
+        client.stop()
+
+        # ...and then wait until the connection is lost
+        ev.wait(5)
+
+        self.assertRaises(ConnectionClosedError,
+                          self.client.create, '/foobar')
+
+    def test_double_start(self):
+        self.assertTrue(self.client.connected)
+        self.client.start()
+        self.assertTrue(self.client.connected)
+
+    def test_double_stop(self):
+        self.client.stop()
+        self.assertFalse(self.client.connected)
+        self.client.stop()
+        self.assertFalse(self.client.connected)
+
+    def test_restart(self):
+        self.assertTrue(self.client.connected)
+        self.client.restart()
+        self.assertTrue(self.client.connected)
+
+    def test_closed(self):
+        client = self.client
+        client.stop()
+
+        write_pipe = client._connection._write_pipe
+
+        # close the connection to free the pipe
+        client.close()
+        eq_(client._connection._write_pipe, None)
+
+        # sneak in and patch client to simulate race between a thread
+        # calling stop(); close() and one running a command
+        oldstate = client._state
+        client._state = KeeperState.CONNECTED
+        client._connection._write_pipe = write_pipe
+        try:
+            # simulate call made after write pipe is closed
+            self.assertRaises(ConnectionClosedError, client.exists, '/')
+
+            # simualte call made after write pipe is set to None
+            client._connection._write_pipe = None
+            self.assertRaises(ConnectionClosedError, client.exists, '/')
+
+        finally:
+            # reset for teardown
+            client._state = oldstate
+            client._connection._write_pipe = None
+
+
+class TestClient(KazooTestCase):
+    def _getKazooState(self):
+        from kazoo.protocol.states import KazooState
+        return KazooState
+
+    def test_client_id(self):
+        client_id = self.client.client_id
+        self.assertEqual(type(client_id), tuple)
+        # make sure password is of correct length
+        self.assertEqual(len(client_id[1]), 16)
+
+    def test_connected(self):
+        client = self.client
+        self.assertTrue(client.connected)
+
+    def test_create(self):
+        client = self.client
+        path = client.create("/1")
+        eq_(path, "/1")
+        self.assertTrue(client.exists("/1"))
+
+    def test_create_on_broken_connection(self):
+        client = self.client
+        client.start()
+
+        client._state = KeeperState.EXPIRED_SESSION
+        self.assertRaises(SessionExpiredError, client.create,
+                          '/closedpath', b'bar')
+
+        client._state = KeeperState.AUTH_FAILED
+        self.assertRaises(AuthFailedError, client.create,
+                          '/closedpath', b'bar')
+
+        client._state = KeeperState.CONNECTING
+        self.assertRaises(SessionExpiredError, client.create,
+                          '/closedpath', b'bar')
+        client.stop()
+        client.close()
+
+        self.assertRaises(ConnectionClosedError, client.create,
+                          '/closedpath', b'bar')
+
+    def test_create_null_data(self):
+        client = self.client
+        client.create("/nulldata", None)
+        value, _ = client.get("/nulldata")
+        self.assertEqual(value, None)
+
+    def test_create_empty_string(self):
+        client = self.client
+        client.create("/empty", b"")
+        value, _ = client.get("/empty")
+        eq_(value, b"")
+
+    def test_create_unicode_path(self):
+        client = self.client
+        path = client.create(u("/ascii"))
+        eq_(path, u("/ascii"))
+        path = client.create(u("/\xe4hm"))
+        eq_(path, u("/\xe4hm"))
+
+    def test_create_async_returns_unchrooted_path(self):
+        client = self.client
+        path = client.create_async('/1').get()
+        eq_(path, "/1")
+
+    def test_create_invalid_path(self):
+        client = self.client
+        self.assertRaises(TypeError, client.create, ('a', ))
+        self.assertRaises(ValueError, client.create, ".")
+        self.assertRaises(ValueError, client.create, "/a/../b")
+        self.assertRaises(BadArgumentsError, client.create, "/b\x00")
+        self.assertRaises(BadArgumentsError, client.create, "/b\x1e")
+
+    def test_create_invalid_arguments(self):
+        from kazoo.security import OPEN_ACL_UNSAFE
+        single_acl = OPEN_ACL_UNSAFE[0]
+        client = self.client
+        self.assertRaises(TypeError, client.create, 'a', acl='all')
+        self.assertRaises(TypeError, client.create, 'a', acl=single_acl)
+        self.assertRaises(TypeError, client.create, 'a', value=['a'])
+        self.assertRaises(TypeError, client.create, 'a', ephemeral='yes')
+        self.assertRaises(TypeError, client.create, 'a', sequence='yes')
+        self.assertRaises(TypeError, client.create, 'a', makepath='yes')
+
+    def test_create_value(self):
+        client = self.client
+        client.create("/1", b"bytes")
+        data, stat = client.get("/1")
+        eq_(data, b"bytes")
+
+    def test_create_unicode_value(self):
+        client = self.client
+        self.assertRaises(TypeError, client.create, "/1", u("\xe4hm"))
+
+    def test_create_large_value(self):
+        client = self.client
+        kb_512 = b"a" * (512 * 1024)
+        client.create("/1", kb_512)
+        self.assertTrue(client.exists("/1"))
+        mb_2 = b"a" * (2 * 1024 * 1024)
+        self.assertRaises(ConnectionLoss, client.create, "/2", mb_2)
+
+    def test_create_acl_duplicate(self):
+        from kazoo.security import OPEN_ACL_UNSAFE
+        single_acl = OPEN_ACL_UNSAFE[0]
+        client = self.client
+        client.create("/1", acl=[single_acl, single_acl])
+        acls, stat = client.get_acls("/1")
+        # ZK >3.4 removes duplicate ACL entries
+        if TRAVIS_ZK_VERSION:
+            version = TRAVIS_ZK_VERSION
+        else:
+            version = client.server_version()
+        self.assertEqual(len(acls), 1 if version > (3, 4) else 2)
+
+    def test_create_acl_empty_list(self):
+        from kazoo.security import OPEN_ACL_UNSAFE
+        client = self.client
+        client.create("/1", acl=[])
+        acls, stat = client.get_acls("/1")
+        self.assertEqual(acls, OPEN_ACL_UNSAFE)
+
+    def test_version_no_connection(self):
+        @raises(ConnectionLoss)
+        def testit():
+            self.client.server_version()
+        self.client.stop()
+        testit()
+
+    def test_create_ephemeral(self):
+        client = self.client
+        client.create("/1", b"ephemeral", ephemeral=True)
+        data, stat = client.get("/1")
+        eq_(data, b"ephemeral")
+        eq_(stat.ephemeralOwner, client.client_id[0])
+
+    def test_create_no_ephemeral(self):
+        client = self.client
+        client.create("/1", b"val1")
+        data, stat = client.get("/1")
+        self.assertFalse(stat.ephemeralOwner)
+
+    def test_create_ephemeral_no_children(self):
+        from kazoo.exceptions import NoChildrenForEphemeralsError
+        client = self.client
+        client.create("/1", b"ephemeral", ephemeral=True)
+        self.assertRaises(NoChildrenForEphemeralsError,
+            client.create, "/1/2", b"val1")
+        self.assertRaises(NoChildrenForEphemeralsError,
+            client.create, "/1/2", b"val1", ephemeral=True)
+
+    def test_create_sequence(self):
+        client = self.client
+        client.create("/folder")
+        path = client.create("/folder/a", b"sequence", sequence=True)
+        eq_(path, "/folder/a0000000000")
+        path2 = client.create("/folder/a", b"sequence", sequence=True)
+        eq_(path2, "/folder/a0000000001")
+        path3 = client.create("/folder/", b"sequence", sequence=True)
+        eq_(path3, "/folder/0000000002")
+
+    def test_create_ephemeral_sequence(self):
+        basepath = "/" + uuid.uuid4().hex
+        realpath = self.client.create(basepath, b"sandwich", sequence=True,
+            ephemeral=True)
+        self.assertTrue(basepath != realpath and realpath.startswith(basepath))
+        data, stat = self.client.get(realpath)
+        eq_(data, b"sandwich")
+
+    def test_create_makepath(self):
+        self.client.create("/1/2", b"val1", makepath=True)
+        data, stat = self.client.get("/1/2")
+        eq_(data, b"val1")
+
+        self.client.create("/1/2/3/4/5", b"val2", makepath=True)
+        data, stat = self.client.get("/1/2/3/4/5")
+        eq_(data, b"val2")
+
+        self.assertRaises(NodeExistsError, self.client.create, "/1/2/3/4/5",
+            b"val2", makepath=True)
+
+    def test_create_makepath_incompatible_acls(self):
+        from kazoo.client import KazooClient
+        from kazoo.security import make_digest_acl_credential, CREATOR_ALL_ACL
+        credential = make_digest_acl_credential("username", "password")
+        alt_client = KazooClient(self.cluster[0].address + self.client.chroot,
+            max_retries=5, auth_data=[("digest", credential)])
+        alt_client.start()
+        alt_client.create("/1/2", b"val2", makepath=True, acl=CREATOR_ALL_ACL)
+
+        try:
+            self.assertRaises(NoAuthError, self.client.create, "/1/2/3/4/5",
+                b"val2", makepath=True)
+        finally:
+            alt_client.delete('/', recursive=True)
+            alt_client.stop()
+
+    def test_create_no_makepath(self):
+        self.assertRaises(NoNodeError, self.client.create, "/1/2", b"val1")
+        self.assertRaises(NoNodeError, self.client.create, "/1/2", b"val1",
+            makepath=False)
+
+        self.client.create("/1/2", b"val1", makepath=True)
+        self.assertRaises(NoNodeError, self.client.create, "/1/2/3/4", b"val1",
+            makepath=False)
+
+    def test_create_exists(self):
+        from kazoo.exceptions import NodeExistsError
+        client = self.client
+        path = client.create("/1")
+        self.assertRaises(NodeExistsError, client.create, path)
+
+    def test_create_get_set(self):
+        nodepath = "/" + uuid.uuid4().hex
+
+        self.client.create(nodepath, b"sandwich", ephemeral=True)
+
+        data, stat = self.client.get(nodepath)
+        eq_(data, b"sandwich")
+
+        newstat = self.client.set(nodepath, b"hats", stat.version)
+        self.assertTrue(newstat)
+        assert newstat.version > stat.version
+
+        # Some other checks of the ZnodeStat object we got
+        eq_(newstat.acl_version, stat.acl_version)
+        eq_(newstat.created, stat.ctime / 1000.0)
+        eq_(newstat.last_modified, newstat.mtime / 1000.0)
+        eq_(newstat.owner_session_id, stat.ephemeralOwner)
+        eq_(newstat.creation_transaction_id, stat.czxid)
+        eq_(newstat.last_modified_transaction_id, newstat.mzxid)
+        eq_(newstat.data_length, newstat.dataLength)
+        eq_(newstat.children_count, stat.numChildren)
+        eq_(newstat.children_version, stat.cversion)
+
+    def test_get_invalid_arguments(self):
+        client = self.client
+        self.assertRaises(TypeError, client.get, ('a', 'b'))
+        self.assertRaises(TypeError, client.get, 'a', watch=True)
+
+    def test_bad_argument(self):
+        client = self.client
+        client.ensure_path("/1")
+        self.assertRaises(TypeError, self.client.set, "/1", 1)
+
+    def test_ensure_path(self):
+        client = self.client
+        client.ensure_path("/1/2")
+        self.assertTrue(client.exists("/1/2"))
+
+        client.ensure_path("/1/2/3/4")
+        self.assertTrue(client.exists("/1/2/3/4"))
+
+    def test_sync(self):
+        client = self.client
+        self.assertTrue(client.sync('/'), '/')
+
+    def test_exists(self):
+        nodepath = "/" + uuid.uuid4().hex
+
+        exists = self.client.exists(nodepath)
+        eq_(exists, None)
+
+        self.client.create(nodepath, b"sandwich", ephemeral=True)
+        exists = self.client.exists(nodepath)
+        self.assertTrue(exists)
+        assert isinstance(exists.version, int)
+
+        multi_node_nonexistent = "/" + uuid.uuid4().hex + "/hats"
+        exists = self.client.exists(multi_node_nonexistent)
+        eq_(exists, None)
+
+    def test_exists_invalid_arguments(self):
+        client = self.client
+        self.assertRaises(TypeError, client.exists, ('a', 'b'))
+        self.assertRaises(TypeError, client.exists, 'a', watch=True)
+
+    def test_exists_watch(self):
+        nodepath = "/" + uuid.uuid4().hex
+        event = self.client.handler.event_object()
+
+        def w(watch_event):
+            eq_(watch_event.path, nodepath)
+            event.set()
+
+        exists = self.client.exists(nodepath, watch=w)
+        eq_(exists, None)
+
+        self.client.create(nodepath, ephemeral=True)
+
+        event.wait(1)
+        self.assertTrue(event.is_set())
+
+    def test_exists_watcher_exception(self):
+        nodepath = "/" + uuid.uuid4().hex
+        event = self.client.handler.event_object()
+
+        # if the watcher throws an exception, all we can really do is log it
+        def w(watch_event):
+            eq_(watch_event.path, nodepath)
+            event.set()
+
+            raise Exception("test exception in callback")
+
+        exists = self.client.exists(nodepath, watch=w)
+        eq_(exists, None)
+
+        self.client.create(nodepath, ephemeral=True)
+
+        event.wait(1)
+        self.assertTrue(event.is_set())
+
+    def test_create_delete(self):
+        nodepath = "/" + uuid.uuid4().hex
+
+        self.client.create(nodepath, b"zzz")
+
+        self.client.delete(nodepath)
+
+        exists = self.client.exists(nodepath)
+        eq_(exists, None)
+
+    def test_get_acls(self):
+        from kazoo.security import make_digest_acl
+        acl = make_digest_acl('user', 'pass', all=True)
+        client = self.client
+        try:
+            client.create('/a', acl=[acl])
+            self.assertTrue(acl in client.get_acls('/a')[0])
+        finally:
+            client.delete('/a')
+
+    def test_get_acls_invalid_arguments(self):
+        client = self.client
+        self.assertRaises(TypeError, client.get_acls, ('a', 'b'))
+
+    def test_set_acls(self):
+        from kazoo.security import make_digest_acl
+        acl = make_digest_acl('user', 'pass', all=True)
+        client = self.client
+        client.create('/a')
+        try:
+            client.set_acls('/a', [acl])
+            self.assertTrue(acl in client.get_acls('/a')[0])
+        finally:
+            client.delete('/a')
+
+    def test_set_acls_empty(self):
+        client = self.client
+        client.create('/a')
+        self.assertRaises(InvalidACLError, client.set_acls, '/a', [])
+
+    def test_set_acls_no_node(self):
+        from kazoo.security import OPEN_ACL_UNSAFE
+        client = self.client
+        self.assertRaises(NoNodeError, client.set_acls, '/a', OPEN_ACL_UNSAFE)
+
+    def test_set_acls_invalid_arguments(self):
+        from kazoo.security import OPEN_ACL_UNSAFE
+        single_acl = OPEN_ACL_UNSAFE[0]
+        client = self.client
+        self.assertRaises(TypeError, client.set_acls, ('a', 'b'), ())
+        self.assertRaises(TypeError, client.set_acls, 'a', single_acl)
+        self.assertRaises(TypeError, client.set_acls, 'a', 'all')
+        self.assertRaises(TypeError, client.set_acls, 'a', [single_acl], 'V1')
+
+    def test_set(self):
+        client = self.client
+        client.create('a', b'first')
+        stat = client.set('a', b'second')
+        data, stat2 = client.get('a')
+        self.assertEqual(data, b'second')
+        self.assertEqual(stat, stat2)
+
+    def test_set_null_data(self):
+        client = self.client
+        client.create("/nulldata", b"not none")
+        client.set("/nulldata", None)
+        value, _ = client.get("/nulldata")
+        self.assertEqual(value, None)
+
+    def test_set_empty_string(self):
+        client = self.client
+        client.create("/empty", b"not empty")
+        client.set("/empty", b"")
+        value, _ = client.get("/empty")
+        eq_(value, b"")
+
+    def test_set_invalid_arguments(self):
+        client = self.client
+        client.create('a', b'first')
+        self.assertRaises(TypeError, client.set, ('a', 'b'), b'value')
+        self.assertRaises(TypeError, client.set, 'a', ['v', 'w'])
+        self.assertRaises(TypeError, client.set, 'a', b'value', 'V1')
+
+    def test_delete(self):
+        client = self.client
+        client.ensure_path('/a/b')
+        self.assertTrue('b' in client.get_children('a'))
+        client.delete('/a/b')
+        self.assertFalse('b' in client.get_children('a'))
+
+    def test_delete_recursive(self):
+        client = self.client
+        client.ensure_path('/a/b/c')
+        client.ensure_path('/a/b/d')
+        client.delete('/a/b', recursive=True)
+        client.delete('/a/b/c', recursive=True)
+        self.assertFalse('b' in client.get_children('a'))
+
+    def test_delete_invalid_arguments(self):
+        client = self.client
+        client.ensure_path('/a/b')
+        self.assertRaises(TypeError, client.delete, '/a/b', recursive='all')
+        self.assertRaises(TypeError, client.delete, ('a', 'b'))
+        self.assertRaises(TypeError, client.delete, '/a/b', version='V1')
+
+    def test_get_children(self):
+        client = self.client
+        client.ensure_path('/a/b/c')
+        client.ensure_path('/a/b/d')
+        self.assertEqual(client.get_children('/a'), ['b'])
+        self.assertEqual(set(client.get_children('/a/b')), set(['c', 'd']))
+        self.assertEqual(client.get_children('/a/b/c'), [])
+
+    def test_get_children2(self):
+        client = self.client
+        client.ensure_path('/a/b')
+        children, stat = client.get_children('/a', include_data=True)
+        value, stat2 = client.get('/a')
+        self.assertEqual(children, ['b'])
+        self.assertEqual(stat2.version, stat.version)
+
+    def test_get_children2_many_nodes(self):
+        client = self.client
+        client.ensure_path('/a/b')
+        client.ensure_path('/a/c')
+        client.ensure_path('/a/d')
+        children, stat = client.get_children('/a', include_data=True)
+        value, stat2 = client.get('/a')
+        self.assertEqual(set(children), set(['b', 'c', 'd']))
+        self.assertEqual(stat2.version, stat.version)
+
+    def test_get_children_no_node(self):
+        client = self.client
+        self.assertRaises(NoNodeError, client.get_children, '/none')
+        self.assertRaises(NoNodeError, client.get_children,
+            '/none', include_data=True)
+
+    def test_get_children_invalid_path(self):
+        client = self.client
+        self.assertRaises(ValueError, client.get_children, '../a')
+
+    def test_get_children_invalid_arguments(self):
+        client = self.client
+        self.assertRaises(TypeError, client.get_children, ('a', 'b'))
+        self.assertRaises(TypeError, client.get_children, 'a', watch=True)
+        self.assertRaises(TypeError, client.get_children,
+            'a', include_data='yes')
+
+    def test_invalid_auth(self):
+        from kazoo.exceptions import AuthFailedError
+        from kazoo.protocol.states import KeeperState
+
+        client = self.client
+        client.stop()
+        client._state = KeeperState.AUTH_FAILED
+
+        @raises(AuthFailedError)
+        def testit():
+            client.get('/')
+        testit()
+
+    def test_client_state(self):
+        from kazoo.protocol.states import KeeperState
+        eq_(self.client.client_state, KeeperState.CONNECTED)
+
+    def test_update_host_list(self):
+        from kazoo.client import KazooClient
+        from kazoo.protocol.states import KeeperState
+        hosts = self.cluster[0].address
+        # create a client with only one server in its list
+        client = KazooClient(hosts=hosts)
+        client.start()
+
+        # try to change the chroot, not currently allowed
+        self.assertRaises(ConfigurationError,
+                          client.set_hosts, hosts + '/new_chroot')
+
+        # grow the cluster to 3
+        client.set_hosts(self.servers)
+
+        # shut down the first host
+        try:
+            self.cluster[0].stop()
+            time.sleep(5)
+            eq_(client.client_state, KeeperState.CONNECTED)
+        finally:
+            self.cluster[0].run()
+
+
+dummy_dict = {
+    'aversion': 1, 'ctime': 0, 'cversion': 1,
+    'czxid': 110, 'dataLength': 1, 'ephemeralOwner': 'ben',
+    'mtime': 1, 'mzxid': 1, 'numChildren': 0, 'pzxid': 1, 'version': 1
+}
+
+
+class TestClientTransactions(KazooTestCase):
+
+    def setUp(self):
+        KazooTestCase.setUp(self)
+        skip = False
+        if TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION < (3, 4):
+            skip = True
+        elif TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION >= (3, 4):
+            skip = False
+        else:
+            ver = self.client.server_version()
+            if ver[1] < 4:
+                skip = True
+        if skip:
+            raise SkipTest("Must use Zookeeper 3.4 or above")
+
+    def test_basic_create(self):
+        t = self.client.transaction()
+        t.create('/freddy')
+        t.create('/fred', ephemeral=True)
+        t.create('/smith', sequence=True)
+        results = t.commit()
+        eq_(results[0], '/freddy')
+        eq_(len(results), 3)
+        self.assertTrue(results[2].startswith('/smith0'))
+
+    def test_bad_creates(self):
+        args_list = [(True,), ('/smith', 0), ('/smith', b'', 'bleh'),
+                     ('/smith', b'', None, 'fred'),
+                     ('/smith', b'', None, True, 'fred')]
+
+        @raises(TypeError)
+        def testit(args):
+            t = self.client.transaction()
+            t.create(*args)
+
+        for args in args_list:
+            testit(args)
+
+    def test_default_acl(self):
+        from kazoo.security import make_digest_acl
+        username = uuid.uuid4().hex
+        password = uuid.uuid4().hex
+
+        digest_auth = "%s:%s" % (username, password)
+        acl = make_digest_acl(username, password, all=True)
+
+        self.client.add_auth("digest", digest_auth)
+        self.client.default_acl = (acl,)
+
+        t = self.client.transaction()
+        t.create('/freddy')
+        results = t.commit()
+        eq_(results[0], '/freddy')
+
+    def test_basic_delete(self):
+        self.client.create('/fred')
+        t = self.client.transaction()
+        t.delete('/fred')
+        results = t.commit()
+        eq_(results[0], True)
+
+    def test_bad_deletes(self):
+        args_list = [(True,), ('/smith', 'woops'), ]
+
+        @raises(TypeError)
+        def testit(args):
+            t = self.client.transaction()
+            t.delete(*args)
+
+        for args in args_list:
+            testit(args)
+
+    def test_set(self):
+        self.client.create('/fred', b'01')
+        t = self.client.transaction()
+        t.set_data('/fred', b'oops')
+        t.commit()
+        res = self.client.get('/fred')
+        eq_(res[0], b'oops')
+
+    def test_bad_sets(self):
+        args_list = [(42, 52), ('/smith', False), ('/smith', b'', 'oops')]
+
+        @raises(TypeError)
+        def testit(args):
+            t = self.client.transaction()
+            t.set_data(*args)
+
+        for args in args_list:
+            testit(args)
+
+    def test_check(self):
+        self.client.create('/fred')
+        version = self.client.get('/fred')[1].version
+        t = self.client.transaction()
+        t.check('/fred', version)
+        t.create('/blah')
+        results = t.commit()
+        eq_(results[0], True)
+        eq_(results[1], '/blah')
+
+    def test_bad_checks(self):
+        args_list = [(42, 52), ('/smith', 'oops')]
+
+        @raises(TypeError)
+        def testit(args):
+            t = self.client.transaction()
+            t.check(*args)
+
+        for args in args_list:
+            testit(args)
+
+    def test_bad_transaction(self):
+        from kazoo.exceptions import RolledBackError, NoNodeError
+        t = self.client.transaction()
+        t.create('/fred')
+        t.delete('/smith')
+        results = t.commit()
+        eq_(results[0].__class__, RolledBackError)
+        eq_(results[1].__class__, NoNodeError)
+
+    def test_bad_commit(self):
+        t = self.client.transaction()
+
+        @raises(ValueError)
+        def testit():
+            t.commit()
+
+        t.committed = True
+        testit()
+
+    def test_bad_context(self):
+        @raises(TypeError)
+        def testit():
+            with self.client.transaction() as t:
+                t.check(4232)
+        testit()
+
+    def test_context(self):
+        with self.client.transaction() as t:
+            t.create('/smith', b'32')
+        eq_(self.client.get('/smith')[0], b'32')
+
+
+class TestCallbacks(unittest.TestCase):
+    def test_session_callback_states(self):
+        from kazoo.protocol.states import KazooState, KeeperState
+        from kazoo.client import KazooClient
+
+        client = KazooClient()
+        client._handle = 1
+        client._live.set()
+
+        result = client._session_callback(KeeperState.CONNECTED)
+        eq_(result, None)
+
+        # Now with stopped
+        client._stopped.set()
+        result = client._session_callback(KeeperState.CONNECTED)
+        eq_(result, None)
+
+        # Test several state transitions
+        client._stopped.clear()
+        client.start_async = lambda: True
+        client._session_callback(KeeperState.CONNECTED)
+        eq_(client.state, KazooState.CONNECTED)
+
+        client._session_callback(KeeperState.AUTH_FAILED)
+        eq_(client.state, KazooState.LOST)
+
+        client._handle = 1
+        client._session_callback(-250)
+        eq_(client.state, KazooState.SUSPENDED)
+
+
+class TestNonChrootClient(KazooTestCase):
+
+    def test_create(self):
+        client = self._get_nonchroot_client()
+        self.assertEqual(client.chroot, '')
+        client.start()
+        node = uuid.uuid4().hex
+        path = client.create(node, ephemeral=True)
+        client.delete(path)
+        client.stop()
+
+    def test_unchroot(self):
+        client = self._get_nonchroot_client()
+        client.chroot = '/a'
+        self.assertEquals(client.unchroot('/a/b'), '/b')
+        self.assertEquals(client.unchroot('/b/c'), '/b/c')
diff --git a/slider-agent/src/main/python/kazoo/tests/test_connection.py b/slider-agent/src/main/python/kazoo/tests/test_connection.py
new file mode 100644
index 0000000..c764b03
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_connection.py
@@ -0,0 +1,320 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+from collections import namedtuple
+import os
+import errno
+import threading
+import time
+import uuid
+import struct
+
+from nose import SkipTest
+from nose.tools import eq_
+from nose.tools import raises
+import mock
+
+from kazoo.exceptions import ConnectionLoss
+from kazoo.protocol.serialization import (
+    Connect,
+    int_struct,
+    write_string,
+)
+from kazoo.protocol.states import KazooState
+from kazoo.protocol.connection import _CONNECTION_DROP
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import wait
+from kazoo.tests.util import TRAVIS_ZK_VERSION
+
+
+class Delete(namedtuple('Delete', 'path version')):
+    type = 2
+
+    def serialize(self):
+        b = bytearray()
+        b.extend(write_string(self.path))
+        b.extend(int_struct.pack(self.version))
+        return b
+
+    @classmethod
+    def deserialize(self, bytes, offset):
+        raise ValueError("oh my")
+
+
+class TestConnectionHandler(KazooTestCase):
+    def test_bad_deserialization(self):
+        async_object = self.client.handler.async_result()
+        self.client._queue.append((Delete(self.client.chroot, -1), async_object))
+        os.write(self.client._connection._write_pipe, b'\0')
+
+        @raises(ValueError)
+        def testit():
+            async_object.get()
+        testit()
+
+    def test_with_bad_sessionid(self):
+        ev = threading.Event()
+
+        def expired(state):
+            if state == KazooState.CONNECTED:
+                ev.set()
+
+        password = os.urandom(16)
+        client = self._get_client(client_id=(82838284824, password))
+        client.add_listener(expired)
+        client.start()
+        try:
+            ev.wait(15)
+            eq_(ev.is_set(), True)
+        finally:
+            client.stop()
+
+    def test_connection_read_timeout(self):
+        client = self.client
+        ev = threading.Event()
+        path = "/" + uuid.uuid4().hex
+        handler = client.handler
+        _select = handler.select
+        _socket = client._connection._socket
+
+        def delayed_select(*args, **kwargs):
+            result = _select(*args, **kwargs)
+            if len(args[0]) == 1 and _socket in args[0]:
+                # for any socket read, simulate a timeout
+                return [], [], []
+            return result
+
+        def back(state):
+            if state == KazooState.CONNECTED:
+                ev.set()
+
+        client.add_listener(back)
+        client.create(path, b"1")
+        try:
+            handler.select = delayed_select
+            self.assertRaises(ConnectionLoss, client.get, path)
+        finally:
+            handler.select = _select
+        # the client reconnects automatically
+        ev.wait(5)
+        eq_(ev.is_set(), True)
+        eq_(client.get(path)[0], b"1")
+
+    def test_connection_write_timeout(self):
+        client = self.client
+        ev = threading.Event()
+        path = "/" + uuid.uuid4().hex
+        handler = client.handler
+        _select = handler.select
+        _socket = client._connection._socket
+
+        def delayed_select(*args, **kwargs):
+            result = _select(*args, **kwargs)
+            if _socket in args[1]:
+                # for any socket write, simulate a timeout
+                return [], [], []
+            return result
+
+        def back(state):
+            if state == KazooState.CONNECTED:
+                ev.set()
+        client.add_listener(back)
+
+        try:
+            handler.select = delayed_select
+            self.assertRaises(ConnectionLoss, client.create, path)
+        finally:
+            handler.select = _select
+        # the client reconnects automatically
+        ev.wait(5)
+        eq_(ev.is_set(), True)
+        eq_(client.exists(path), None)
+
+    def test_connection_deserialize_fail(self):
+        client = self.client
+        ev = threading.Event()
+        path = "/" + uuid.uuid4().hex
+        handler = client.handler
+        _select = handler.select
+        _socket = client._connection._socket
+
+        def delayed_select(*args, **kwargs):
+            result = _select(*args, **kwargs)
+            if _socket in args[1]:
+                # for any socket write, simulate a timeout
+                return [], [], []
+            return result
+
+        def back(state):
+            if state == KazooState.CONNECTED:
+                ev.set()
+        client.add_listener(back)
+
+        deserialize_ev = threading.Event()
+
+        def bad_deserialize(bytes, offset):
+            deserialize_ev.set()
+            raise struct.error()
+
+        # force the connection to die but, on reconnect, cause the
+        # server response to be non-deserializable. ensure that the client
+        # continues to retry. This partially reproduces a rare bug seen
+        # in production.
+
+        with mock.patch.object(Connect, 'deserialize') as mock_deserialize:
+            mock_deserialize.side_effect = bad_deserialize
+            try:
+                handler.select = delayed_select
+                self.assertRaises(ConnectionLoss, client.create, path)
+            finally:
+                handler.select = _select
+            # the client reconnects automatically but the first attempt will
+            # hit a deserialize failure. wait for that.
+            deserialize_ev.wait(5)
+            eq_(deserialize_ev.is_set(), True)
+
+        # this time should succeed
+        ev.wait(5)
+        eq_(ev.is_set(), True)
+        eq_(client.exists(path), None)
+
+    def test_connection_close(self):
+        self.assertRaises(Exception, self.client.close)
+        self.client.stop()
+        self.client.close()
+
+        # should be able to restart
+        self.client.start()
+
+    def test_connection_pipe(self):
+        client = self.client
+        read_pipe = client._connection._read_pipe
+        write_pipe = client._connection._write_pipe
+
+        assert read_pipe is not None
+        assert write_pipe is not None
+
+        # stop client and pipe should not yet be closed
+        client.stop()
+        assert read_pipe is not None
+        assert write_pipe is not None
+        os.fstat(read_pipe)
+        os.fstat(write_pipe)
+
+        # close client, and pipes should be
+        client.close()
+
+        try:
+            os.fstat(read_pipe)
+        except OSError as e:
+            if not e.errno == errno.EBADF:
+                raise
+        else:
+            self.fail("Expected read_pipe to be closed")
+
+        try:
+            os.fstat(write_pipe)
+        except OSError as e:
+            if not e.errno == errno.EBADF:
+                raise
+        else:
+            self.fail("Expected write_pipe to be closed")
+
+        # start client back up. should get a new, valid pipe
+        client.start()
+        read_pipe = client._connection._read_pipe
+        write_pipe = client._connection._write_pipe
+
+        assert read_pipe is not None
+        assert write_pipe is not None
+        os.fstat(read_pipe)
+        os.fstat(write_pipe)
+
+    def test_dirty_pipe(self):
+        client = self.client
+        read_pipe = client._connection._read_pipe
+        write_pipe = client._connection._write_pipe
+
+        # add a stray byte to the pipe and ensure that doesn't
+        # blow up client. simulates case where some error leaves
+        # a byte in the pipe which doesn't correspond to the
+        # request queue.
+        os.write(write_pipe, b'\0')
+
+        # eventually this byte should disappear from pipe
+        wait(lambda: client.handler.select([read_pipe], [], [], 0)[0] == [])
+
+
+class TestConnectionDrop(KazooTestCase):
+    def test_connection_dropped(self):
+        ev = threading.Event()
+
+        def back(state):
+            if state == KazooState.CONNECTED:
+                ev.set()
+
+        # create a node with a large value and stop the ZK node
+        path = "/" + uuid.uuid4().hex
+        self.client.create(path)
+        self.client.add_listener(back)
+        result = self.client.set_async(path, b'a' * 1000 * 1024)
+        self.client._call(_CONNECTION_DROP, None)
+
+        self.assertRaises(ConnectionLoss, result.get)
+        # we have a working connection to a new node
+        ev.wait(30)
+        eq_(ev.is_set(), True)
+
+
+class TestReadOnlyMode(KazooTestCase):
+
+    def setUp(self):
+        self.setup_zookeeper(read_only=True)
+        skip = False
+        if TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION < (3, 4):
+            skip = True
+        elif TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION >= (3, 4):
+            skip = False
+        else:
+            ver = self.client.server_version()
+            if ver[1] < 4:
+                skip = True
+        if skip:
+            raise SkipTest("Must use Zookeeper 3.4 or above")
+
+    def tearDown(self):
+        self.client.stop()
+
+    def test_read_only(self):
+        from kazoo.exceptions import NotReadOnlyCallError
+        from kazoo.protocol.states import KeeperState
+
+        client = self.client
+        states = []
+        ev = threading.Event()
+
+        @client.add_listener
+        def listen(state):
+            states.append(state)
+            if client.client_state == KeeperState.CONNECTED_RO:
+                ev.set()
+        try:
+            self.cluster[1].stop()
+            self.cluster[2].stop()
+            ev.wait(6)
+            eq_(ev.is_set(), True)
+            eq_(client.client_state, KeeperState.CONNECTED_RO)
+
+            # Test read only command
+            eq_(client.get_children('/'), [])
+
+            # Test error with write command
+            @raises(NotReadOnlyCallError)
+            def testit():
+                client.create('/fred')
+            testit()
+
+            # Wait for a ping
+            time.sleep(15)
+        finally:
+            client.remove_listener(listen)
+            self.cluster[1].run()
+            self.cluster[2].run()
diff --git a/slider-agent/src/main/python/kazoo/tests/test_counter.py b/slider-agent/src/main/python/kazoo/tests/test_counter.py
new file mode 100644
index 0000000..b0361d0
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_counter.py
@@ -0,0 +1,36 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+
+
+class KazooCounterTests(KazooTestCase):
+
+    def _makeOne(self, **kw):
+        path = "/" + uuid.uuid4().hex
+        return self.client.Counter(path, **kw)
+
+    def test_int_counter(self):
+        counter = self._makeOne()
+        eq_(counter.value, 0)
+        counter += 2
+        counter + 1
+        eq_(counter.value, 3)
+        counter -= 3
+        counter - 1
+        eq_(counter.value, -1)
+
+    def test_float_counter(self):
+        counter = self._makeOne(default=0.0)
+        eq_(counter.value, 0.0)
+        counter += 2.1
+        eq_(counter.value, 2.1)
+        counter -= 3.1
+        eq_(counter.value, -1.0)
+
+    def test_errors(self):
+        counter = self._makeOne()
+        self.assertRaises(TypeError, counter.__add__, 2.1)
+        self.assertRaises(TypeError, counter.__add__, b"a")
diff --git a/slider-agent/src/main/python/kazoo/tests/test_election.py b/slider-agent/src/main/python/kazoo/tests/test_election.py
new file mode 100644
index 0000000..a9610bf
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_election.py
@@ -0,0 +1,140 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+import sys
+import threading
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import wait
+
+
+class UniqueError(Exception):
+    """Error raised only by test leader function
+    """
+
+
+class KazooElectionTests(KazooTestCase):
+    def setUp(self):
+        super(KazooElectionTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+
+        self.condition = threading.Condition()
+
+        # election contenders set these when elected. The exit event is set by
+        # the test to make the leader exit.
+        self.leader_id = None
+        self.exit_event = None
+
+        # tests set this before the event to make the leader raise an error
+        self.raise_exception = False
+
+        # set by a worker thread when an unexpected error is hit.
+        # better way to do this?
+        self.thread_exc_info = None
+
+    def _spawn_contender(self, contender_id, election):
+        thread = threading.Thread(target=self._election_thread,
+            args=(contender_id, election))
+        thread.daemon = True
+        thread.start()
+        return thread
+
+    def _election_thread(self, contender_id, election):
+        try:
+            election.run(self._leader_func, contender_id)
+        except UniqueError:
+            if not self.raise_exception:
+                self.thread_exc_info = sys.exc_info()
+        except Exception:
+            self.thread_exc_info = sys.exc_info()
+        else:
+            if self.raise_exception:
+                e = Exception("expected leader func to raise exception")
+                self.thread_exc_info = (Exception, e, None)
+
+    def _leader_func(self, name):
+        exit_event = threading.Event()
+        with self.condition:
+            self.exit_event = exit_event
+            self.leader_id = name
+            self.condition.notify_all()
+
+        exit_event.wait(45)
+        if self.raise_exception:
+            raise UniqueError("expected error in the leader function")
+
+    def _check_thread_error(self):
+        if self.thread_exc_info:
+            t, o, tb = self.thread_exc_info
+            raise t(o)
+
+    def test_election(self):
+        elections = {}
+        threads = {}
+        for _ in range(3):
+            contender = "c" + uuid.uuid4().hex
+            elections[contender] = self.client.Election(self.path, contender)
+            threads[contender] = self._spawn_contender(contender,
+                elections[contender])
+
+        # wait for a leader to be elected
+        times = 0
+        with self.condition:
+            while not self.leader_id:
+                self.condition.wait(5)
+                times += 1
+                if times > 5:
+                    raise Exception("Still not a leader: lid: %s",
+                                    self.leader_id)
+
+        election = self.client.Election(self.path)
+
+        # make sure all contenders are in the pool
+        wait(lambda: len(election.contenders()) == len(elections))
+        contenders = election.contenders()
+
+        eq_(set(contenders), set(elections.keys()))
+
+        # first one in list should be leader
+        first_leader = contenders[0]
+        eq_(first_leader, self.leader_id)
+
+        # tell second one to cancel election. should never get elected.
+        elections[contenders[1]].cancel()
+
+        # make leader exit. third contender should be elected.
+        self.exit_event.set()
+        with self.condition:
+            while self.leader_id == first_leader:
+                self.condition.wait(45)
+        eq_(self.leader_id, contenders[2])
+        self._check_thread_error()
+
+        # make first contender re-enter the race
+        threads[first_leader].join()
+        threads[first_leader] = self._spawn_contender(first_leader,
+            elections[first_leader])
+
+        # contender set should now be the current leader plus the first leader
+        wait(lambda: len(election.contenders()) == 2)
+        contenders = election.contenders()
+        eq_(set(contenders), set([self.leader_id, first_leader]))
+
+        # make current leader raise an exception. first should be reelected
+        self.raise_exception = True
+        self.exit_event.set()
+        with self.condition:
+            while self.leader_id != first_leader:
+                self.condition.wait(45)
+        eq_(self.leader_id, first_leader)
+        self._check_thread_error()
+
+        self.exit_event.set()
+        for thread in threads.values():
+            thread.join()
+        self._check_thread_error()
+
+    def test_bad_func(self):
+        election = self.client.Election(self.path)
+        self.assertRaises(ValueError, election.run, "not a callable")
diff --git a/slider-agent/src/main/python/kazoo/tests/test_exceptions.py b/slider-agent/src/main/python/kazoo/tests/test_exceptions.py
new file mode 100644
index 0000000..e469089
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_exceptions.py
@@ -0,0 +1,23 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+from unittest import TestCase
+
+
+class ExceptionsTestCase(TestCase):
+
+    def _get(self):
+        from kazoo import exceptions
+        return exceptions
+
+    def test_backwards_alias(self):
+        module = self._get()
+        self.assertTrue(getattr(module, 'NoNodeException'))
+        self.assertTrue(module.NoNodeException, module.NoNodeError)
+
+    def test_exceptions_code(self):
+        module = self._get()
+        exc_8 = module.EXCEPTIONS[-8]
+        self.assertTrue(isinstance(exc_8(), module.BadArgumentsError))
+
+    def test_invalid_code(self):
+        module = self._get()
+        self.assertRaises(RuntimeError, module.EXCEPTIONS.__getitem__, 666)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_gevent_handler.py b/slider-agent/src/main/python/kazoo/tests/test_gevent_handler.py
new file mode 100644
index 0000000..71d9727
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_gevent_handler.py
@@ -0,0 +1,161 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import unittest
+
+from nose import SkipTest
+from nose.tools import eq_
+from nose.tools import raises
+
+from kazoo.client import KazooClient
+from kazoo.exceptions import NoNodeError
+from kazoo.protocol.states import Callback
+from kazoo.testing import KazooTestCase
+from kazoo.tests import test_client
+
+
+class TestGeventHandler(unittest.TestCase):
+
+    def setUp(self):
+        try:
+            import gevent
+        except ImportError:
+            raise SkipTest('gevent not available.')
+
+    def _makeOne(self, *args):
+        from kazoo.handlers.gevent import SequentialGeventHandler
+        return SequentialGeventHandler(*args)
+
+    def _getAsync(self, *args):
+        from kazoo.handlers.gevent import AsyncResult
+        return AsyncResult
+
+    def _getEvent(self):
+        from gevent.event import Event
+        return Event
+
+    def test_proper_threading(self):
+        h = self._makeOne()
+        h.start()
+        assert isinstance(h.event_object(), self._getEvent())
+
+    def test_matching_async(self):
+        h = self._makeOne()
+        h.start()
+        async = self._getAsync()
+        assert isinstance(h.async_result(), async)
+
+    def test_exception_raising(self):
+        h = self._makeOne()
+
+        @raises(h.timeout_exception)
+        def testit():
+            raise h.timeout_exception("This is a timeout")
+        testit()
+
+    def test_exception_in_queue(self):
+        h = self._makeOne()
+        h.start()
+        ev = self._getEvent()()
+
+        def func():
+            ev.set()
+            raise ValueError('bang')
+
+        call1 = Callback('completion', func, ())
+        h.dispatch_callback(call1)
+        ev.wait()
+
+    def test_queue_empty_exception(self):
+        from gevent.queue import Empty
+        h = self._makeOne()
+        h.start()
+        ev = self._getEvent()()
+
+        def func():
+            ev.set()
+            raise Empty()
+
+        call1 = Callback('completion', func, ())
+        h.dispatch_callback(call1)
+        ev.wait()
+
+
+class TestBasicGeventClient(KazooTestCase):
+
+    def setUp(self):
+        try:
+            import gevent
+        except ImportError:
+            raise SkipTest('gevent not available.')
+        KazooTestCase.setUp(self)
+
+    def _makeOne(self, *args):
+        from kazoo.handlers.gevent import SequentialGeventHandler
+        return SequentialGeventHandler(*args)
+
+    def _getEvent(self):
+        from gevent.event import Event
+        return Event
+
+    def test_start(self):
+        client = self._get_client(handler=self._makeOne())
+        client.start()
+        self.assertEqual(client.state, 'CONNECTED')
+        client.stop()
+
+    def test_start_stop_double(self):
+        client = self._get_client(handler=self._makeOne())
+        client.start()
+        self.assertEqual(client.state, 'CONNECTED')
+        client.handler.start()
+        client.handler.stop()
+        client.stop()
+
+    def test_basic_commands(self):
+        client = self._get_client(handler=self._makeOne())
+        client.start()
+        self.assertEqual(client.state, 'CONNECTED')
+        client.create('/anode', 'fred')
+        eq_(client.get('/anode')[0], 'fred')
+        eq_(client.delete('/anode'), True)
+        eq_(client.exists('/anode'), None)
+        client.stop()
+
+    def test_failures(self):
+        client = self._get_client(handler=self._makeOne())
+        client.start()
+        self.assertRaises(NoNodeError, client.get, '/none')
+        client.stop()
+
+    def test_data_watcher(self):
+        client = self._get_client(handler=self._makeOne())
+        client.start()
+        client.ensure_path('/some/node')
+        ev = self._getEvent()()
+
+        @client.DataWatch('/some/node')
+        def changed(d, stat):
+            ev.set()
+
+        ev.wait()
+        ev.clear()
+        client.set('/some/node', 'newvalue')
+        ev.wait()
+        client.stop()
+
+
+class TestGeventClient(test_client.TestClient):
+
+    def setUp(self):
+        try:
+            import gevent
+        except ImportError:
+            raise SkipTest('gevent not available.')
+        KazooTestCase.setUp(self)
+
+    def _makeOne(self, *args):
+        from kazoo.handlers.gevent import SequentialGeventHandler
+        return SequentialGeventHandler(*args)
+
+    def _get_client(self, **kwargs):
+        kwargs["handler"] = self._makeOne()
+        return KazooClient(self.hosts, **kwargs)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_lock.py b/slider-agent/src/main/python/kazoo/tests/test_lock.py
new file mode 100644
index 0000000..6dd15b0
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_lock.py
@@ -0,0 +1,518 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+import threading
+
+from nose.tools import eq_, ok_
+
+from kazoo.exceptions import CancelledError
+from kazoo.exceptions import LockTimeout
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import wait
+
+
+class KazooLockTests(KazooTestCase):
+    def setUp(self):
+        super(KazooLockTests, self).setUp()
+        self.lockpath = "/" + uuid.uuid4().hex
+
+        self.condition = threading.Condition()
+        self.released = threading.Event()
+        self.active_thread = None
+        self.cancelled_threads = []
+
+    def _thread_lock_acquire_til_event(self, name, lock, event):
+        try:
+            with lock:
+                with self.condition:
+                    eq_(self.active_thread, None)
+                    self.active_thread = name
+                    self.condition.notify_all()
+
+                event.wait()
+
+                with self.condition:
+                    eq_(self.active_thread, name)
+                    self.active_thread = None
+                    self.condition.notify_all()
+            self.released.set()
+        except CancelledError:
+            with self.condition:
+                self.cancelled_threads.append(name)
+                self.condition.notify_all()
+
+    def test_lock_one(self):
+        lock_name = uuid.uuid4().hex
+        lock = self.client.Lock(self.lockpath, lock_name)
+        event = threading.Event()
+
+        thread = threading.Thread(target=self._thread_lock_acquire_til_event,
+            args=(lock_name, lock, event))
+        thread.start()
+
+        lock2_name = uuid.uuid4().hex
+        anotherlock = self.client.Lock(self.lockpath, lock2_name)
+
+        # wait for any contender to show up on the lock
+        wait(anotherlock.contenders)
+        eq_(anotherlock.contenders(), [lock_name])
+
+        with self.condition:
+            while self.active_thread != lock_name:
+                self.condition.wait()
+
+        # release the lock
+        event.set()
+
+        with self.condition:
+            while self.active_thread:
+                self.condition.wait()
+        self.released.wait()
+        thread.join()
+
+    def test_lock(self):
+        threads = []
+        names = ["contender" + str(i) for i in range(5)]
+
+        contender_bits = {}
+
+        for name in names:
+            e = threading.Event()
+
+            l = self.client.Lock(self.lockpath, name)
+            t = threading.Thread(target=self._thread_lock_acquire_til_event,
+                args=(name, l, e))
+            contender_bits[name] = (t, e)
+            threads.append(t)
+
+        # acquire the lock ourselves first to make the others line up
+        lock = self.client.Lock(self.lockpath, "test")
+        lock.acquire()
+
+        for t in threads:
+            t.start()
+
+        # wait for everyone to line up on the lock
+        wait(lambda: len(lock.contenders()) == 6)
+        contenders = lock.contenders()
+
+        eq_(contenders[0], "test")
+        contenders = contenders[1:]
+        remaining = list(contenders)
+
+        # release the lock and contenders should claim it in order
+        lock.release()
+
+        for contender in contenders:
+            thread, event = contender_bits[contender]
+
+            with self.condition:
+                while not self.active_thread:
+                    self.condition.wait()
+                eq_(self.active_thread, contender)
+
+            eq_(lock.contenders(), remaining)
+            remaining = remaining[1:]
+
+            event.set()
+
+            with self.condition:
+                while self.active_thread:
+                    self.condition.wait()
+        for thread in threads:
+            thread.join()
+
+    def test_lock_reconnect(self):
+        event = threading.Event()
+        other_lock = self.client.Lock(self.lockpath, 'contender')
+        thread = threading.Thread(target=self._thread_lock_acquire_til_event,
+                                  args=('contender', other_lock, event))
+
+        # acquire the lock ourselves first to make the contender line up
+        lock = self.client.Lock(self.lockpath, "test")
+        lock.acquire()
+
+        thread.start()
+        # wait for the contender to line up on the lock
+        wait(lambda: len(lock.contenders()) == 2)
+        eq_(lock.contenders(), ['test', 'contender'])
+
+        self.expire_session()
+
+        lock.release()
+
+        with self.condition:
+            while not self.active_thread:
+                self.condition.wait()
+            eq_(self.active_thread, 'contender')
+
+        event.set()
+        thread.join()
+
+    def test_lock_non_blocking(self):
+        lock_name = uuid.uuid4().hex
+        lock = self.client.Lock(self.lockpath, lock_name)
+        event = threading.Event()
+
+        thread = threading.Thread(target=self._thread_lock_acquire_til_event,
+            args=(lock_name, lock, event))
+        thread.start()
+
+        lock1 = self.client.Lock(self.lockpath, lock_name)
+
+        # wait for the thread to acquire the lock
+        with self.condition:
+            if not self.active_thread:
+                self.condition.wait(5)
+
+        ok_(not lock1.acquire(blocking=False))
+        eq_(lock.contenders(), [lock_name])  # just one - itself
+
+        event.set()
+        thread.join()
+
+    def test_lock_fail_first_call(self):
+        event1 = threading.Event()
+        lock1 = self.client.Lock(self.lockpath, "one")
+        thread1 = threading.Thread(target=self._thread_lock_acquire_til_event,
+            args=("one", lock1, event1))
+        thread1.start()
+
+        # wait for this thread to acquire the lock
+        with self.condition:
+            if not self.active_thread:
+                self.condition.wait(5)
+                eq_(self.active_thread, "one")
+        eq_(lock1.contenders(), ["one"])
+        event1.set()
+        thread1.join()
+
+    def test_lock_cancel(self):
+        event1 = threading.Event()
+        lock1 = self.client.Lock(self.lockpath, "one")
+        thread1 = threading.Thread(target=self._thread_lock_acquire_til_event,
+            args=("one", lock1, event1))
+        thread1.start()
+
+        # wait for this thread to acquire the lock
+        with self.condition:
+            if not self.active_thread:
+                self.condition.wait(5)
+                eq_(self.active_thread, "one")
+
+        client2 = self._get_client()
+        client2.start()
+        event2 = threading.Event()
+        lock2 = client2.Lock(self.lockpath, "two")
+        thread2 = threading.Thread(target=self._thread_lock_acquire_til_event,
+            args=("two", lock2, event2))
+        thread2.start()
+
+        # this one should block in acquire. check that it is a contender
+        wait(lambda: len(lock2.contenders()) > 1)
+        eq_(lock2.contenders(), ["one", "two"])
+
+        lock2.cancel()
+        with self.condition:
+            if not "two" in self.cancelled_threads:
+                self.condition.wait()
+                assert "two" in self.cancelled_threads
+
+        eq_(lock2.contenders(), ["one"])
+
+        thread2.join()
+        event1.set()
+        thread1.join()
+        client2.stop()
+
+    def test_lock_double_calls(self):
+        lock1 = self.client.Lock(self.lockpath, "one")
+        lock1.acquire()
+        lock1.acquire()
+        lock1.release()
+        lock1.release()
+
+    def test_lock_reacquire(self):
+        lock = self.client.Lock(self.lockpath, "one")
+        lock.acquire()
+        lock.release()
+        lock.acquire()
+        lock.release()
+
+    def test_lock_timeout(self):
+        timeout = 3
+        e = threading.Event()
+        started = threading.Event()
+
+        # In the background thread, acquire the lock and wait thrice the time
+        # that the main thread is going to wait to acquire the lock.
+        lock1 = self.client.Lock(self.lockpath, "one")
+
+        def _thread(lock, event, timeout):
+            with lock:
+                started.set()
+                event.wait(timeout)
+                if not event.isSet():
+                    # Eventually fail to avoid hanging the tests
+                    self.fail("lock2 never timed out")
+
+        t = threading.Thread(target=_thread, args=(lock1, e, timeout * 3))
+        t.start()
+
+        # Start the main thread's kazoo client and try to acquire the lock
+        # but give up after `timeout` seconds
+        client2 = self._get_client()
+        client2.start()
+        started.wait(5)
+        self.assertTrue(started.isSet())
+        lock2 = client2.Lock(self.lockpath, "two")
+        try:
+            lock2.acquire(timeout=timeout)
+        except LockTimeout:
+            # A timeout is the behavior we're expecting, since the background
+            # thread should still be holding onto the lock
+            pass
+        else:
+            self.fail("Main thread unexpectedly acquired the lock")
+        finally:
+            # Cleanup
+            e.set()
+            t.join()
+            client2.stop()
+
+
+class TestSemaphore(KazooTestCase):
+    def setUp(self):
+        super(TestSemaphore, self).setUp()
+        self.lockpath = "/" + uuid.uuid4().hex
+
+        self.condition = threading.Condition()
+        self.released = threading.Event()
+        self.active_thread = None
+        self.cancelled_threads = []
+
+    def test_basic(self):
+        sem1 = self.client.Semaphore(self.lockpath)
+        sem1.acquire()
+        sem1.release()
+
+    def test_lock_one(self):
+        sem1 = self.client.Semaphore(self.lockpath, max_leases=1)
+        sem2 = self.client.Semaphore(self.lockpath, max_leases=1)
+        started = threading.Event()
+        event = threading.Event()
+
+        sem1.acquire()
+
+        def sema_one():
+            started.set()
+            with sem2:
+                event.set()
+
+        thread = threading.Thread(target=sema_one, args=())
+        thread.start()
+        started.wait(10)
+
+        self.assertFalse(event.is_set())
+
+        sem1.release()
+        event.wait(10)
+        self.assert_(event.is_set())
+        thread.join()
+
+    def test_non_blocking(self):
+        sem1 = self.client.Semaphore(
+            self.lockpath, identifier='sem1', max_leases=2)
+        sem2 = self.client.Semaphore(
+            self.lockpath, identifier='sem2', max_leases=2)
+        sem3 = self.client.Semaphore(
+            self.lockpath, identifier='sem3', max_leases=2)
+
+        sem1.acquire()
+        sem2.acquire()
+        ok_(not sem3.acquire(blocking=False))
+        eq_(set(sem1.lease_holders()), set(['sem1', 'sem2']))
+        sem2.release()
+        # the next line isn't required, but avoids timing issues in tests
+        sem3.acquire()
+        eq_(set(sem1.lease_holders()), set(['sem1', 'sem3']))
+        sem1.release()
+        sem3.release()
+
+    def test_non_blocking_release(self):
+        sem1 = self.client.Semaphore(
+            self.lockpath, identifier='sem1', max_leases=1)
+        sem2 = self.client.Semaphore(
+            self.lockpath, identifier='sem2', max_leases=1)
+        sem1.acquire()
+        sem2.acquire(blocking=False)
+
+        # make sure there's no shutdown / cleanup error
+        sem1.release()
+        sem2.release()
+
+    def test_holders(self):
+        started = threading.Event()
+        event = threading.Event()
+
+        def sema_one():
+            with self.client.Semaphore(self.lockpath, 'fred', max_leases=1):
+                started.set()
+                event.wait()
+
+        thread = threading.Thread(target=sema_one, args=())
+        thread.start()
+        started.wait()
+        sem1 = self.client.Semaphore(self.lockpath)
+        holders = sem1.lease_holders()
+        eq_(holders, ['fred'])
+        event.set()
+        thread.join()
+
+    def test_semaphore_cancel(self):
+        sem1 = self.client.Semaphore(self.lockpath, 'fred', max_leases=1)
+        sem2 = self.client.Semaphore(self.lockpath, 'george', max_leases=1)
+        sem1.acquire()
+        started = threading.Event()
+        event = threading.Event()
+
+        def sema_one():
+            started.set()
+            try:
+                with sem2:
+                    started.set()
+            except CancelledError:
+                event.set()
+
+        thread = threading.Thread(target=sema_one, args=())
+        thread.start()
+        started.wait()
+        eq_(sem1.lease_holders(), ['fred'])
+        eq_(event.is_set(), False)
+        sem2.cancel()
+        event.wait()
+        eq_(event.is_set(), True)
+        thread.join()
+
+    def test_multiple_acquire_and_release(self):
+        sem1 = self.client.Semaphore(self.lockpath, 'fred', max_leases=1)
+        sem1.acquire()
+        sem1.acquire()
+
+        eq_(True, sem1.release())
+        eq_(False, sem1.release())
+
+    def test_handle_session_loss(self):
+        expire_semaphore = self.client.Semaphore(self.lockpath, 'fred',
+                                                 max_leases=1)
+
+        client = self._get_client()
+        client.start()
+        lh_semaphore = client.Semaphore(self.lockpath, 'george', max_leases=1)
+        lh_semaphore.acquire()
+
+        started = threading.Event()
+        event = threading.Event()
+        event2 = threading.Event()
+
+        def sema_one():
+            started.set()
+            with expire_semaphore:
+                event.set()
+                event2.wait()
+
+        thread = threading.Thread(target=sema_one, args=())
+        thread.start()
+
+        started.wait()
+        eq_(lh_semaphore.lease_holders(), ['george'])
+
+        # Fired in a separate thread to make sure we can see the effect
+        expired = threading.Event()
+
+        def expire():
+            self.expire_session()
+            expired.set()
+
+        thread = threading.Thread(target=expire, args=())
+        thread.start()
+        expire_semaphore.wake_event.wait()
+        expired.wait()
+
+        lh_semaphore.release()
+        client.stop()
+
+        event.wait(5)
+        eq_(expire_semaphore.lease_holders(), ['fred'])
+        event2.set()
+        thread.join()
+
+    def test_inconsistent_max_leases(self):
+        sem1 = self.client.Semaphore(self.lockpath, max_leases=1)
+        sem2 = self.client.Semaphore(self.lockpath, max_leases=2)
+
+        sem1.acquire()
+        self.assertRaises(ValueError, sem2.acquire)
+
+    def test_inconsistent_max_leases_other_data(self):
+        sem1 = self.client.Semaphore(self.lockpath, max_leases=1)
+        sem2 = self.client.Semaphore(self.lockpath, max_leases=2)
+
+        self.client.ensure_path(self.lockpath)
+        self.client.set(self.lockpath, b'a$')
+
+        sem1.acquire()
+        # sem2 thinks it's ok to have two lease holders
+        ok_(sem2.acquire(blocking=False))
+
+    def test_reacquire(self):
+        lock = self.client.Semaphore(self.lockpath)
+        lock.acquire()
+        lock.release()
+        lock.acquire()
+        lock.release()
+
+    def test_acquire_after_cancelled(self):
+        lock = self.client.Semaphore(self.lockpath)
+        self.assertTrue(lock.acquire())
+        self.assertTrue(lock.release())
+        lock.cancel()
+        self.assertTrue(lock.cancelled)
+        self.assertTrue(lock.acquire())
+
+    def test_timeout(self):
+        timeout = 3
+        e = threading.Event()
+        started = threading.Event()
+
+        # In the background thread, acquire the lock and wait thrice the time
+        # that the main thread is going to wait to acquire the lock.
+        sem1 = self.client.Semaphore(self.lockpath, "one")
+
+        def _thread(sem, event, timeout):
+            with sem:
+                started.set()
+                event.wait(timeout)
+                if not event.isSet():
+                    # Eventually fail to avoid hanging the tests
+                    self.fail("sem2 never timed out")
+
+        t = threading.Thread(target=_thread, args=(sem1, e, timeout * 3))
+        t.start()
+
+        # Start the main thread's kazoo client and try to acquire the lock
+        # but give up after `timeout` seconds
+        client2 = self._get_client()
+        client2.start()
+        started.wait(5)
+        self.assertTrue(started.isSet())
+        sem2 = client2.Semaphore(self.lockpath, "two")
+        try:
+            sem2.acquire(timeout=timeout)
+        except LockTimeout:
+            # A timeout is the behavior we're expecting, since the background
+            # thread will still be holding onto the lock
+            e.set()
+        finally:
+            # Cleanup
+            t.join()
+            client2.stop()
diff --git a/slider-agent/src/main/python/kazoo/tests/test_partitioner.py b/slider-agent/src/main/python/kazoo/tests/test_partitioner.py
new file mode 100644
index 0000000..1a4f205
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_partitioner.py
@@ -0,0 +1,93 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+import time
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+from kazoo.recipe.partitioner import PartitionState
+
+
+class KazooPartitionerTests(KazooTestCase):
+    def setUp(self):
+        super(KazooPartitionerTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+
+    def test_party_of_one(self):
+        partitioner = self.client.SetPartitioner(
+            self.path, set=(1, 2, 3), time_boundary=0.2)
+        partitioner.wait_for_acquire(14)
+        eq_(partitioner.state, PartitionState.ACQUIRED)
+        eq_(list(partitioner), [1, 2, 3])
+        partitioner.finish()
+
+    def test_party_of_two(self):
+        partitioners = [self.client.SetPartitioner(self.path, (1, 2),
+                        identifier="p%s" % i, time_boundary=0.2)
+                        for i in range(2)]
+
+        partitioners[0].wait_for_acquire(14)
+        partitioners[1].wait_for_acquire(14)
+        eq_(list(partitioners[0]), [1])
+        eq_(list(partitioners[1]), [2])
+        partitioners[0].finish()
+        time.sleep(0.1)
+        eq_(partitioners[1].release, True)
+        partitioners[1].finish()
+
+    def test_party_expansion(self):
+        partitioners = [self.client.SetPartitioner(self.path, (1, 2, 3),
+                        identifier="p%s" % i, time_boundary=0.2)
+                        for i in range(2)]
+
+        partitioners[0].wait_for_acquire(14)
+        partitioners[1].wait_for_acquire(14)
+        eq_(partitioners[0].state, PartitionState.ACQUIRED)
+        eq_(partitioners[1].state, PartitionState.ACQUIRED)
+
+        eq_(list(partitioners[0]), [1, 3])
+        eq_(list(partitioners[1]), [2])
+
+        # Add another partition, wait till they settle
+        partitioners.append(self.client.SetPartitioner(self.path, (1, 2, 3),
+                            identifier="p2", time_boundary=0.2))
+        time.sleep(0.1)
+        eq_(partitioners[0].release, True)
+        for p in partitioners[:-1]:
+            p.release_set()
+
+        for p in partitioners:
+            p.wait_for_acquire(14)
+
+        eq_(list(partitioners[0]), [1])
+        eq_(list(partitioners[1]), [2])
+        eq_(list(partitioners[2]), [3])
+
+        for p in partitioners:
+            p.finish()
+
+    def test_more_members_than_set_items(self):
+        partitioners = [self.client.SetPartitioner(self.path, (1,),
+                        identifier="p%s" % i, time_boundary=0.2)
+                        for i in range(2)]
+
+        partitioners[0].wait_for_acquire(14)
+        partitioners[1].wait_for_acquire(14)
+        eq_(partitioners[0].state, PartitionState.ACQUIRED)
+        eq_(partitioners[1].state, PartitionState.ACQUIRED)
+
+        eq_(list(partitioners[0]), [1])
+        eq_(list(partitioners[1]), [])
+
+        for p in partitioners:
+            p.finish()
+
+    def test_party_session_failure(self):
+        partitioner = self.client.SetPartitioner(
+            self.path, set=(1, 2, 3), time_boundary=0.2)
+        partitioner.wait_for_acquire(14)
+        eq_(partitioner.state, PartitionState.ACQUIRED)
+        # simulate session failure
+        partitioner._fail_out()
+        partitioner.release_set()
+        self.assertTrue(partitioner.failed)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_party.py b/slider-agent/src/main/python/kazoo/tests/test_party.py
new file mode 100644
index 0000000..61400ae
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_party.py
@@ -0,0 +1,85 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+
+from nose.tools import eq_
+
+from kazoo.testing import KazooTestCase
+
+
+class KazooPartyTests(KazooTestCase):
+    def setUp(self):
+        super(KazooPartyTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+
+    def test_party(self):
+        parties = [self.client.Party(self.path, "p%s" % i)
+                   for i in range(5)]
+
+        one_party = parties[0]
+
+        eq_(list(one_party), [])
+        eq_(len(one_party), 0)
+
+        participants = set()
+        for party in parties:
+            party.join()
+            participants.add(party.data.decode('utf-8'))
+
+            eq_(set(party), participants)
+            eq_(len(party), len(participants))
+
+        for party in parties:
+            party.leave()
+            participants.remove(party.data.decode('utf-8'))
+
+            eq_(set(party), participants)
+            eq_(len(party), len(participants))
+
+    def test_party_reuse_node(self):
+        party = self.client.Party(self.path, "p1")
+        self.client.ensure_path(self.path)
+        self.client.create(party.create_path)
+        party.join()
+        self.assertTrue(party.participating)
+        party.leave()
+        self.assertFalse(party.participating)
+        self.assertEqual(len(party), 0)
+
+    def test_party_vanishing_node(self):
+        party = self.client.Party(self.path, "p1")
+        party.join()
+        self.assertTrue(party.participating)
+        self.client.delete(party.create_path)
+        party.leave()
+        self.assertFalse(party.participating)
+        self.assertEqual(len(party), 0)
+
+
+class KazooShallowPartyTests(KazooTestCase):
+    def setUp(self):
+        super(KazooShallowPartyTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+
+    def test_party(self):
+        parties = [self.client.ShallowParty(self.path, "p%s" % i)
+                   for i in range(5)]
+
+        one_party = parties[0]
+
+        eq_(list(one_party), [])
+        eq_(len(one_party), 0)
+
+        participants = set()
+        for party in parties:
+            party.join()
+            participants.add(party.data.decode('utf-8'))
+
+            eq_(set(party), participants)
+            eq_(len(party), len(participants))
+
+        for party in parties:
+            party.leave()
+            participants.remove(party.data.decode('utf-8'))
+
+            eq_(set(party), participants)
+            eq_(len(party), len(participants))
diff --git a/slider-agent/src/main/python/kazoo/tests/test_paths.py b/slider-agent/src/main/python/kazoo/tests/test_paths.py
new file mode 100644
index 0000000..c9064bb
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_paths.py
@@ -0,0 +1,99 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import sys
+from unittest import TestCase
+
+from kazoo.protocol import paths
+
+
+if sys.version_info > (3, ):  # pragma: nocover
+    def u(s):
+        return s
+else:  # pragma: nocover
+    def u(s):
+        return unicode(s, "unicode_escape")
+
+
+class NormPathTestCase(TestCase):
+
+    def test_normpath(self):
+        self.assertEqual(paths.normpath('/a/b'), '/a/b')
+
+    def test_normpath_empty(self):
+        self.assertEqual(paths.normpath(''), '')
+
+    def test_normpath_unicode(self):
+        self.assertEqual(paths.normpath(u('/\xe4/b')), u('/\xe4/b'))
+
+    def test_normpath_dots(self):
+        self.assertEqual(paths.normpath('/a./b../c'), '/a./b../c')
+
+    def test_normpath_slash(self):
+        self.assertEqual(paths.normpath('/'), '/')
+
+    def test_normpath_multiple_slashes(self):
+        self.assertEqual(paths.normpath('//'), '/')
+        self.assertEqual(paths.normpath('//a/b'), '/a/b')
+        self.assertEqual(paths.normpath('/a//b//'), '/a/b')
+        self.assertEqual(paths.normpath('//a////b///c/'), '/a/b/c')
+
+    def test_normpath_relative(self):
+        self.assertRaises(ValueError, paths.normpath, './a/b')
+        self.assertRaises(ValueError, paths.normpath, '/a/../b')
+
+
+class JoinTestCase(TestCase):
+
+    def test_join(self):
+        self.assertEqual(paths.join('/a'), '/a')
+        self.assertEqual(paths.join('/a', 'b/'), '/a/b/')
+        self.assertEqual(paths.join('/a', 'b', 'c'), '/a/b/c')
+
+    def test_join_empty(self):
+        self.assertEqual(paths.join(''), '')
+        self.assertEqual(paths.join('', 'a', 'b'), 'a/b')
+        self.assertEqual(paths.join('/a', '', 'b/', 'c'), '/a/b/c')
+
+    def test_join_absolute(self):
+        self.assertEqual(paths.join('/a/b', '/c'), '/c')
+
+
+class IsAbsTestCase(TestCase):
+
+    def test_isabs(self):
+        self.assertTrue(paths.isabs('/'))
+        self.assertTrue(paths.isabs('/a'))
+        self.assertTrue(paths.isabs('/a//b/c'))
+        self.assertTrue(paths.isabs('//a/b'))
+
+    def test_isabs_false(self):
+        self.assertFalse(paths.isabs(''))
+        self.assertFalse(paths.isabs('a/'))
+        self.assertFalse(paths.isabs('a/../'))
+
+
+class BaseNameTestCase(TestCase):
+
+    def test_basename(self):
+        self.assertEquals(paths.basename(''), '')
+        self.assertEquals(paths.basename('/'), '')
+        self.assertEquals(paths.basename('//a'), 'a')
+        self.assertEquals(paths.basename('//a/'), '')
+        self.assertEquals(paths.basename('/a/b.//c..'), 'c..')
+
+
+class PrefixRootTestCase(TestCase):
+
+    def test_prefix_root(self):
+        self.assertEquals(paths._prefix_root('/a/', 'b/c'), '/a/b/c')
+        self.assertEquals(paths._prefix_root('/a/b', 'c/d'), '/a/b/c/d')
+        self.assertEquals(paths._prefix_root('/a', '/b/c'), '/a/b/c')
+        self.assertEquals(paths._prefix_root('/a', '//b/c.'), '/a/b/c.')
+
+
+class NormRootTestCase(TestCase):
+
+    def test_norm_root(self):
+        self.assertEquals(paths._norm_root(''), '/')
+        self.assertEquals(paths._norm_root('/'), '/')
+        self.assertEquals(paths._norm_root('//a'), '/a')
+        self.assertEquals(paths._norm_root('//a./b'), '/a./b')
diff --git a/slider-agent/src/main/python/kazoo/tests/test_queue.py b/slider-agent/src/main/python/kazoo/tests/test_queue.py
new file mode 100644
index 0000000..4c13ca9
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_queue.py
@@ -0,0 +1,180 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import uuid
+
+from nose import SkipTest
+from nose.tools import eq_, ok_
+
+from kazoo.testing import KazooTestCase
+from kazoo.tests.util import TRAVIS_ZK_VERSION
+
+
+class KazooQueueTests(KazooTestCase):
+
+    def _makeOne(self):
+        path = "/" + uuid.uuid4().hex
+        return self.client.Queue(path)
+
+    def test_queue_validation(self):
+        queue = self._makeOne()
+        self.assertRaises(TypeError, queue.put, {})
+        self.assertRaises(TypeError, queue.put, b"one", b"100")
+        self.assertRaises(TypeError, queue.put, b"one", 10.0)
+        self.assertRaises(ValueError, queue.put, b"one", -100)
+        self.assertRaises(ValueError, queue.put, b"one", 100000)
+
+    def test_empty_queue(self):
+        queue = self._makeOne()
+        eq_(len(queue), 0)
+        self.assertTrue(queue.get() is None)
+        eq_(len(queue), 0)
+
+    def test_queue(self):
+        queue = self._makeOne()
+        queue.put(b"one")
+        queue.put(b"two")
+        queue.put(b"three")
+        eq_(len(queue), 3)
+
+        eq_(queue.get(), b"one")
+        eq_(queue.get(), b"two")
+        eq_(queue.get(), b"three")
+        eq_(len(queue), 0)
+
+    def test_priority(self):
+        queue = self._makeOne()
+        queue.put(b"four", priority=101)
+        queue.put(b"one", priority=0)
+        queue.put(b"two", priority=0)
+        queue.put(b"three", priority=10)
+
+        eq_(queue.get(), b"one")
+        eq_(queue.get(), b"two")
+        eq_(queue.get(), b"three")
+        eq_(queue.get(), b"four")
+
+
+class KazooLockingQueueTests(KazooTestCase):
+
+    def setUp(self):
+        KazooTestCase.setUp(self)
+        skip = False
+        if TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION < (3, 4):
+            skip = True
+        elif TRAVIS_ZK_VERSION and TRAVIS_ZK_VERSION >= (3, 4):
+            skip = False
+        else:
+            ver = self.client.server_version()
+            if ver[1] < 4:
+                skip = True
+        if skip:
+            raise SkipTest("Must use Zookeeper 3.4 or above")
+
+    def _makeOne(self):
+        path = "/" + uuid.uuid4().hex
+        return self.client.LockingQueue(path)
+
+    def test_queue_validation(self):
+        queue = self._makeOne()
+        self.assertRaises(TypeError, queue.put, {})
+        self.assertRaises(TypeError, queue.put, b"one", b"100")
+        self.assertRaises(TypeError, queue.put, b"one", 10.0)
+        self.assertRaises(ValueError, queue.put, b"one", -100)
+        self.assertRaises(ValueError, queue.put, b"one", 100000)
+        self.assertRaises(TypeError, queue.put_all, {})
+        self.assertRaises(TypeError, queue.put_all, [{}])
+        self.assertRaises(TypeError, queue.put_all, [b"one"], b"100")
+        self.assertRaises(TypeError, queue.put_all, [b"one"], 10.0)
+        self.assertRaises(ValueError, queue.put_all, [b"one"], -100)
+        self.assertRaises(ValueError, queue.put_all, [b"one"], 100000)
+
+    def test_empty_queue(self):
+        queue = self._makeOne()
+        eq_(len(queue), 0)
+        self.assertTrue(queue.get(0) is None)
+        eq_(len(queue), 0)
+
+    def test_queue(self):
+        queue = self._makeOne()
+        queue.put(b"one")
+        queue.put_all([b"two", b"three"])
+        eq_(len(queue), 3)
+
+        ok_(not queue.consume())
+        ok_(not queue.holds_lock())
+        eq_(queue.get(1), b"one")
+        ok_(queue.holds_lock())
+        # Without consuming, should return the same element
+        eq_(queue.get(1), b"one")
+        ok_(queue.consume())
+        ok_(not queue.holds_lock())
+        eq_(queue.get(1), b"two")
+        ok_(queue.holds_lock())
+        ok_(queue.consume())
+        ok_(not queue.holds_lock())
+        eq_(queue.get(1), b"three")
+        ok_(queue.holds_lock())
+        ok_(queue.consume())
+        ok_(not queue.holds_lock())
+        ok_(not queue.consume())
+        eq_(len(queue), 0)
+
+    def test_consume(self):
+        queue = self._makeOne()
+
+        queue.put(b"one")
+        ok_(not queue.consume())
+        queue.get(.1)
+        ok_(queue.consume())
+        ok_(not queue.consume())
+
+    def test_holds_lock(self):
+        queue = self._makeOne()
+
+        ok_(not queue.holds_lock())
+        queue.put(b"one")
+        queue.get(.1)
+        ok_(queue.holds_lock())
+        queue.consume()
+        ok_(not queue.holds_lock())
+
+    def test_priority(self):
+        queue = self._makeOne()
+        queue.put(b"four", priority=101)
+        queue.put(b"one", priority=0)
+        queue.put(b"two", priority=0)
+        queue.put(b"three", priority=10)
+
+        eq_(queue.get(1), b"one")
+        ok_(queue.consume())
+        eq_(queue.get(1), b"two")
+        ok_(queue.consume())
+        eq_(queue.get(1), b"three")
+        ok_(queue.consume())
+        eq_(queue.get(1), b"four")
+        ok_(queue.consume())
+
+    def test_concurrent_execution(self):
+        queue = self._makeOne()
+        value1 = []
+        value2 = []
+        value3 = []
+        event1 = self.client.handler.event_object()
+        event2 = self.client.handler.event_object()
+        event3 = self.client.handler.event_object()
+
+        def get_concurrently(value, event):
+            q = self.client.LockingQueue(queue.path)
+            value.append(q.get(.1))
+            event.set()
+
+        self.client.handler.spawn(get_concurrently, value1, event1)
+        self.client.handler.spawn(get_concurrently, value2, event2)
+        self.client.handler.spawn(get_concurrently, value3, event3)
+        queue.put(b"one")
+        event1.wait(.2)
+        event2.wait(.2)
+        event3.wait(.2)
+
+        result = value1 + value2 + value3
+        eq_(result.count(b"one"), 1)
+        eq_(result.count(None), 2)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_retry.py b/slider-agent/src/main/python/kazoo/tests/test_retry.py
new file mode 100644
index 0000000..84c8d41
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_retry.py
@@ -0,0 +1,78 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import unittest
+
+from nose.tools import eq_
+
+
+class TestRetrySleeper(unittest.TestCase):
+
+    def _pass(self):
+        pass
+
+    def _fail(self, times=1):
+        from kazoo.retry import ForceRetryError
+        scope = dict(times=0)
+
+        def inner():
+            if scope['times'] >= times:
+                pass
+            else:
+                scope['times'] += 1
+                raise ForceRetryError('Failed!')
+        return inner
+
+    def _makeOne(self, *args, **kwargs):
+        from kazoo.retry import KazooRetry
+        return KazooRetry(*args, **kwargs)
+
+    def test_reset(self):
+        retry = self._makeOne(delay=0, max_tries=2)
+        retry(self._fail())
+        eq_(retry._attempts, 1)
+        retry.reset()
+        eq_(retry._attempts, 0)
+
+    def test_too_many_tries(self):
+        from kazoo.retry import RetryFailedError
+        retry = self._makeOne(delay=0)
+        self.assertRaises(RetryFailedError, retry, self._fail(times=999))
+        eq_(retry._attempts, 1)
+
+    def test_maximum_delay(self):
+        def sleep_func(_time):
+            pass
+
+        retry = self._makeOne(delay=10, max_tries=100, sleep_func=sleep_func)
+        retry(self._fail(times=10))
+        self.assertTrue(retry._cur_delay < 4000, retry._cur_delay)
+        # gevent's sleep function is picky about the type
+        eq_(type(retry._cur_delay), float)
+
+    def test_copy(self):
+        _sleep = lambda t: None
+        retry = self._makeOne(sleep_func=_sleep)
+        rcopy = retry.copy()
+        self.assertTrue(rcopy.sleep_func is _sleep)
+
+
+class TestKazooRetry(unittest.TestCase):
+
+    def _makeOne(self, **kw):
+        from kazoo.retry import KazooRetry
+        return KazooRetry(**kw)
+
+    def test_connection_closed(self):
+        from kazoo.exceptions import ConnectionClosedError
+        retry = self._makeOne()
+
+        def testit():
+            raise ConnectionClosedError()
+        self.assertRaises(ConnectionClosedError, retry, testit)
+
+    def test_session_expired(self):
+        from kazoo.exceptions import SessionExpiredError
+        retry = self._makeOne(max_tries=1)
+
+        def testit():
+            raise SessionExpiredError()
+        self.assertRaises(Exception, retry, testit)
diff --git a/slider-agent/src/main/python/kazoo/tests/test_security.py b/slider-agent/src/main/python/kazoo/tests/test_security.py
new file mode 100644
index 0000000..587c265
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_security.py
@@ -0,0 +1,41 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import unittest
+
+from nose.tools import eq_
+from kazoo.security import Permissions
+
+
+class TestACL(unittest.TestCase):
+    def _makeOne(self, *args, **kwargs):
+        from kazoo.security import make_acl
+        return make_acl(*args, **kwargs)
+
+    def test_read_acl(self):
+        acl = self._makeOne("digest", ":", read=True)
+        eq_(acl.perms & Permissions.READ, Permissions.READ)
+
+    def test_all_perms(self):
+        acl = self._makeOne("digest", ":", read=True, write=True,
+                            create=True, delete=True, admin=True)
+        for perm in [Permissions.READ, Permissions.CREATE, Permissions.WRITE,
+                     Permissions.DELETE, Permissions.ADMIN]:
+            eq_(acl.perms & perm, perm)
+
+    def test_perm_listing(self):
+        from kazoo.security import ACL
+        f = ACL(15, 'fred')
+        self.assert_('READ' in f.acl_list)
+        self.assert_('WRITE' in f.acl_list)
+        self.assert_('CREATE' in f.acl_list)
+        self.assert_('DELETE' in f.acl_list)
+
+        f = ACL(16, 'fred')
+        self.assert_('ADMIN' in f.acl_list)
+
+        f = ACL(31, 'george')
+        self.assert_('ALL' in f.acl_list)
+
+    def test_perm_repr(self):
+        from kazoo.security import ACL
+        f = ACL(16, 'fred')
+        self.assert_("ACL(perms=16, acl_list=['ADMIN']" in repr(f))
diff --git a/slider-agent/src/main/python/kazoo/tests/test_threading_handler.py b/slider-agent/src/main/python/kazoo/tests/test_threading_handler.py
new file mode 100644
index 0000000..4de5781
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_threading_handler.py
@@ -0,0 +1,327 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import threading
+import unittest
+
+import mock
+from nose.tools import assert_raises
+from nose.tools import eq_
+from nose.tools import raises
+
+
+class TestThreadingHandler(unittest.TestCase):
+    def _makeOne(self, *args):
+        from kazoo.handlers.threading import SequentialThreadingHandler
+        return SequentialThreadingHandler(*args)
+
+    def _getAsync(self, *args):
+        from kazoo.handlers.threading import AsyncResult
+        return AsyncResult
+
+    def test_proper_threading(self):
+        h = self._makeOne()
+        h.start()
+        # In Python 3.3 _Event is gone, before Event is function
+        event_class = getattr(threading, '_Event', threading.Event)
+        assert isinstance(h.event_object(), event_class)
+
+    def test_matching_async(self):
+        h = self._makeOne()
+        h.start()
+        async = self._getAsync()
+        assert isinstance(h.async_result(), async)
+
+    def test_exception_raising(self):
+        h = self._makeOne()
+
+        @raises(h.timeout_exception)
+        def testit():
+            raise h.timeout_exception("This is a timeout")
+        testit()
+
+    def test_double_start_stop(self):
+        h = self._makeOne()
+        h.start()
+        self.assertTrue(h._running)
+        h.start()
+        h.stop()
+        h.stop()
+        self.assertFalse(h._running)
+
+
+class TestThreadingAsync(unittest.TestCase):
+    def _makeOne(self, *args):
+        from kazoo.handlers.threading import AsyncResult
+        return AsyncResult(*args)
+
+    def _makeHandler(self):
+        from kazoo.handlers.threading import SequentialThreadingHandler
+        return SequentialThreadingHandler()
+
+    def test_ready(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        eq_(async.ready(), False)
+        async.set('val')
+        eq_(async.ready(), True)
+        eq_(async.successful(), True)
+        eq_(async.exception, None)
+
+    def test_callback_queued(self):
+        mock_handler = mock.Mock()
+        mock_handler.completion_queue = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        async.rawlink(lambda a: a)
+        async.set('val')
+
+        assert mock_handler.completion_queue.put.called
+
+    def test_set_exception(self):
+        mock_handler = mock.Mock()
+        mock_handler.completion_queue = mock.Mock()
+        async = self._makeOne(mock_handler)
+        async.rawlink(lambda a: a)
+        async.set_exception(ImportError('Error occured'))
+
+        assert isinstance(async.exception, ImportError)
+        assert mock_handler.completion_queue.put.called
+
+    def test_get_wait_while_setting(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+        bv = threading.Event()
+        cv = threading.Event()
+
+        def wait_for_val():
+            bv.set()
+            val = async.get()
+            lst.append(val)
+            cv.set()
+        th = threading.Thread(target=wait_for_val)
+        th.start()
+        bv.wait()
+
+        async.set('fred')
+        cv.wait()
+        eq_(lst, ['fred'])
+        th.join()
+
+    def test_get_with_nowait(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+        timeout = self._makeHandler().timeout_exception
+
+        @raises(timeout)
+        def test_it():
+            async.get(block=False)
+        test_it()
+
+        @raises(timeout)
+        def test_nowait():
+            async.get_nowait()
+        test_nowait()
+
+    def test_get_with_exception(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+        bv = threading.Event()
+        cv = threading.Event()
+
+        def wait_for_val():
+            bv.set()
+            try:
+                val = async.get()
+            except ImportError:
+                lst.append('oops')
+            else:
+                lst.append(val)
+            cv.set()
+        th = threading.Thread(target=wait_for_val)
+        th.start()
+        bv.wait()
+
+        async.set_exception(ImportError)
+        cv.wait()
+        eq_(lst, ['oops'])
+        th.join()
+
+    def test_wait(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+        bv = threading.Event()
+        cv = threading.Event()
+
+        def wait_for_val():
+            bv.set()
+            try:
+                val = async.wait(10)
+            except ImportError:
+                lst.append('oops')
+            else:
+                lst.append(val)
+            cv.set()
+        th = threading.Thread(target=wait_for_val)
+        th.start()
+        bv.wait(10)
+
+        async.set("fred")
+        cv.wait(15)
+        eq_(lst, [True])
+        th.join()
+
+    def test_set_before_wait(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+        cv = threading.Event()
+        async.set('fred')
+
+        def wait_for_val():
+            val = async.get()
+            lst.append(val)
+            cv.set()
+        th = threading.Thread(target=wait_for_val)
+        th.start()
+        cv.wait()
+        eq_(lst, ['fred'])
+        th.join()
+
+    def test_set_exc_before_wait(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+        cv = threading.Event()
+        async.set_exception(ImportError)
+
+        def wait_for_val():
+            try:
+                val = async.get()
+            except ImportError:
+                lst.append('ooops')
+            else:
+                lst.append(val)
+            cv.set()
+        th = threading.Thread(target=wait_for_val)
+        th.start()
+        cv.wait()
+        eq_(lst, ['ooops'])
+        th.join()
+
+    def test_linkage(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+        cv = threading.Event()
+
+        lst = []
+
+        def add_on():
+            lst.append(True)
+
+        def wait_for_val():
+            async.get()
+            cv.set()
+
+        th = threading.Thread(target=wait_for_val)
+        th.start()
+
+        async.rawlink(add_on)
+        async.set('fred')
+        assert mock_handler.completion_queue.put.called
+        async.unlink(add_on)
+        cv.wait()
+        eq_(async.value, 'fred')
+        th.join()
+
+    def test_linkage_not_ready(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+
+        def add_on():
+            lst.append(True)
+
+        async.set('fred')
+        assert not mock_handler.completion_queue.called
+        async.rawlink(add_on)
+        assert mock_handler.completion_queue.put.called
+
+    def test_link_and_unlink(self):
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+
+        def add_on():
+            lst.append(True)
+
+        async.rawlink(add_on)
+        assert not mock_handler.completion_queue.put.called
+        async.unlink(add_on)
+        async.set('fred')
+        assert not mock_handler.completion_queue.put.called
+
+    def test_captured_exception(self):
+        from kazoo.handlers.utils import capture_exceptions
+
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        @capture_exceptions(async)
+        def exceptional_function():
+            return 1/0
+
+        exceptional_function()
+
+        assert_raises(ZeroDivisionError, async.get)
+
+    def test_no_capture_exceptions(self):
+        from kazoo.handlers.utils import capture_exceptions
+
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+
+        def add_on():
+            lst.append(True)
+
+        async.rawlink(add_on)
+
+        @capture_exceptions(async)
+        def regular_function():
+            return True
+
+        regular_function()
+
+        assert not mock_handler.completion_queue.put.called
+
+    def test_wraps(self):
+        from kazoo.handlers.utils import wrap
+
+        mock_handler = mock.Mock()
+        async = self._makeOne(mock_handler)
+
+        lst = []
+
+        def add_on(result):
+            lst.append(result.get())
+
+        async.rawlink(add_on)
+
+        @wrap(async)
+        def regular_function():
+            return 'hello'
+
+        assert regular_function() == 'hello'
+        assert mock_handler.completion_queue.put.called
+        assert async.get() == 'hello'
diff --git a/slider-agent/src/main/python/kazoo/tests/test_watchers.py b/slider-agent/src/main/python/kazoo/tests/test_watchers.py
new file mode 100644
index 0000000..44795c4
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/test_watchers.py
@@ -0,0 +1,490 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+import time
+import threading
+import uuid
+
+from nose.tools import eq_
+from nose.tools import raises
+
+from kazoo.exceptions import KazooException
+from kazoo.protocol.states import EventType
+from kazoo.testing import KazooTestCase
+
+
+class KazooDataWatcherTests(KazooTestCase):
+    def setUp(self):
+        super(KazooDataWatcherTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+        self.client.ensure_path(self.path)
+
+    def test_data_watcher(self):
+        update = threading.Event()
+        data = [True]
+
+        # Make it a non-existent path
+        self.path += 'f'
+
+        @self.client.DataWatch(self.path)
+        def changed(d, stat):
+            data.pop()
+            data.append(d)
+            update.set()
+
+        update.wait(10)
+        eq_(data, [None])
+        update.clear()
+
+        self.client.create(self.path, b'fred')
+        update.wait(10)
+        eq_(data[0], b'fred')
+        update.clear()
+
+    def test_data_watcher_once(self):
+        update = threading.Event()
+        data = [True]
+
+        # Make it a non-existent path
+        self.path += 'f'
+
+        dwatcher = self.client.DataWatch(self.path)
+
+        @dwatcher
+        def changed(d, stat):
+            data.pop()
+            data.append(d)
+            update.set()
+
+        update.wait(10)
+        eq_(data, [None])
+        update.clear()
+
+        @raises(KazooException)
+        def test_it():
+            @dwatcher
+            def func(d, stat):
+                data.pop()
+        test_it()
+
+    def test_data_watcher_with_event(self):
+        # Test that the data watcher gets passed the event, if it
+        # accepts three arguments
+        update = threading.Event()
+        data = [True]
+
+        # Make it a non-existent path
+        self.path += 'f'
+
+        @self.client.DataWatch(self.path)
+        def changed(d, stat, event):
+            data.pop()
+            data.append(event)
+            update.set()
+
+        update.wait(10)
+        eq_(data, [None])
+        update.clear()
+
+        self.client.create(self.path, b'fred')
+        update.wait(10)
+        eq_(data[0].type, EventType.CREATED)
+        update.clear()
+
+    def test_func_style_data_watch(self):
+        update = threading.Event()
+        data = [True]
+
+        # Make it a non-existent path
+        path = self.path + 'f'
+
+        def changed(d, stat):
+            data.pop()
+            data.append(d)
+            update.set()
+        self.client.DataWatch(path, changed)
+
+        update.wait(10)
+        eq_(data, [None])
+        update.clear()
+
+        self.client.create(path, b'fred')
+        update.wait(10)
+        eq_(data[0], b'fred')
+        update.clear()
+
+    def test_datawatch_across_session_expire(self):
+        update = threading.Event()
+        data = [True]
+
+        @self.client.DataWatch(self.path)
+        def changed(d, stat):
+            data.pop()
+            data.append(d)
+            update.set()
+
+        update.wait(10)
+        eq_(data, [b""])
+        update.clear()
+
+        self.expire_session()
+        self.client.retry(self.client.set, self.path, b'fred')
+        update.wait(25)
+        eq_(data[0], b'fred')
+
+    def test_func_stops(self):
+        update = threading.Event()
+        data = [True]
+
+        self.path += "f"
+
+        fail_through = []
+
+        @self.client.DataWatch(self.path)
+        def changed(d, stat):
+            data.pop()
+            data.append(d)
+            update.set()
+            if fail_through:
+                return False
+
+        update.wait(10)
+        eq_(data, [None])
+        update.clear()
+
+        fail_through.append(True)
+        self.client.create(self.path, b'fred')
+        update.wait(10)
+        eq_(data[0], b'fred')
+        update.clear()
+
+        self.client.set(self.path, b'asdfasdf')
+        update.wait(0.2)
+        eq_(data[0], b'fred')
+
+        d, stat = self.client.get(self.path)
+        eq_(d, b'asdfasdf')
+
+    def test_no_such_node(self):
+        args = []
+
+        @self.client.DataWatch("/some/path")
+        def changed(d, stat):
+            args.extend([d, stat])
+
+        eq_(args, [None, None])
+
+    def test_bad_watch_func2(self):
+        counter = 0
+
+        @self.client.DataWatch(self.path)
+        def changed(d, stat):
+            if counter > 0:
+                raise Exception("oops")
+
+        raises(Exception)(changed)
+
+        counter += 1
+        self.client.set(self.path, b'asdfasdf')
+
+    def test_watcher_evaluating_to_false(self):
+        class WeirdWatcher(list):
+            def __call__(self, *args):
+                self.called = True
+        watcher = WeirdWatcher()
+        self.client.DataWatch(self.path, watcher)
+        self.client.set(self.path, b'mwahaha')
+        self.assertTrue(watcher.called)
+
+    def test_watcher_repeat_delete(self):
+        a = []
+        ev = threading.Event()
+
+        self.client.delete(self.path)
+
+        @self.client.DataWatch(self.path)
+        def changed(val, stat):
+            a.append(val)
+            ev.set()
+
+        eq_(a, [None])
+        ev.wait(10)
+        ev.clear()
+        self.client.create(self.path, b'blah')
+        ev.wait(10)
+        eq_(ev.is_set(), True)
+        ev.clear()
+        eq_(a, [None, b'blah'])
+        self.client.delete(self.path)
+        ev.wait(10)
+        eq_(ev.is_set(), True)
+        ev.clear()
+        eq_(a, [None, b'blah', None])
+        self.client.create(self.path, b'blah')
+        ev.wait(10)
+        eq_(ev.is_set(), True)
+        ev.clear()
+        eq_(a, [None, b'blah', None, b'blah'])
+
+    def test_watcher_with_closing(self):
+        a = []
+        ev = threading.Event()
+
+        self.client.delete(self.path)
+
+        @self.client.DataWatch(self.path)
+        def changed(val, stat):
+            a.append(val)
+            ev.set()
+        eq_(a, [None])
+
+        b = False
+        try:
+            self.client.stop()
+        except:
+            b = True
+        eq_(b, False)
+
+
+class KazooChildrenWatcherTests(KazooTestCase):
+    def setUp(self):
+        super(KazooChildrenWatcherTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+        self.client.ensure_path(self.path)
+
+    def test_child_watcher(self):
+        update = threading.Event()
+        all_children = ['fred']
+
+        @self.client.ChildrenWatch(self.path)
+        def changed(children):
+            while all_children:
+                all_children.pop()
+            all_children.extend(children)
+            update.set()
+
+        update.wait(10)
+        eq_(all_children, [])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'smith')
+        update.wait(10)
+        eq_(all_children, ['smith'])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'george')
+        update.wait(10)
+        eq_(sorted(all_children), ['george', 'smith'])
+
+    def test_child_watcher_once(self):
+        update = threading.Event()
+        all_children = ['fred']
+
+        cwatch = self.client.ChildrenWatch(self.path)
+
+        @cwatch
+        def changed(children):
+            while all_children:
+                all_children.pop()
+            all_children.extend(children)
+            update.set()
+
+        update.wait(10)
+        eq_(all_children, [])
+        update.clear()
+
+        @raises(KazooException)
+        def test_it():
+            @cwatch
+            def changed_again(children):
+                update.set()
+        test_it()
+
+    def test_child_watcher_with_event(self):
+        update = threading.Event()
+        events = [True]
+
+        @self.client.ChildrenWatch(self.path, send_event=True)
+        def changed(children, event):
+            events.pop()
+            events.append(event)
+            update.set()
+
+        update.wait(10)
+        eq_(events, [None])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'smith')
+        update.wait(10)
+        eq_(events[0].type, EventType.CHILD)
+        update.clear()
+
+    def test_func_style_child_watcher(self):
+        update = threading.Event()
+        all_children = ['fred']
+
+        def changed(children):
+            while all_children:
+                all_children.pop()
+            all_children.extend(children)
+            update.set()
+
+        self.client.ChildrenWatch(self.path, changed)
+
+        update.wait(10)
+        eq_(all_children, [])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'smith')
+        update.wait(10)
+        eq_(all_children, ['smith'])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'george')
+        update.wait(10)
+        eq_(sorted(all_children), ['george', 'smith'])
+
+    def test_func_stops(self):
+        update = threading.Event()
+        all_children = ['fred']
+
+        fail_through = []
+
+        @self.client.ChildrenWatch(self.path)
+        def changed(children):
+            while all_children:
+                all_children.pop()
+            all_children.extend(children)
+            update.set()
+            if fail_through:
+                return False
+
+        update.wait(10)
+        eq_(all_children, [])
+        update.clear()
+
+        fail_through.append(True)
+        self.client.create(self.path + '/' + 'smith')
+        update.wait(10)
+        eq_(all_children, ['smith'])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'george')
+        update.wait(0.5)
+        eq_(all_children, ['smith'])
+
+    def test_child_watch_session_loss(self):
+        update = threading.Event()
+        all_children = ['fred']
+
+        @self.client.ChildrenWatch(self.path)
+        def changed(children):
+            while all_children:
+                all_children.pop()
+            all_children.extend(children)
+            update.set()
+
+        update.wait(10)
+        eq_(all_children, [])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'smith')
+        update.wait(10)
+        eq_(all_children, ['smith'])
+        update.clear()
+        self.expire_session()
+
+        self.client.retry(self.client.create,
+                          self.path + '/' + 'george')
+        update.wait(20)
+        eq_(sorted(all_children), ['george', 'smith'])
+
+    def test_child_stop_on_session_loss(self):
+        update = threading.Event()
+        all_children = ['fred']
+
+        @self.client.ChildrenWatch(self.path, allow_session_lost=False)
+        def changed(children):
+            while all_children:
+                all_children.pop()
+            all_children.extend(children)
+            update.set()
+
+        update.wait(10)
+        eq_(all_children, [])
+        update.clear()
+
+        self.client.create(self.path + '/' + 'smith')
+        update.wait(10)
+        eq_(all_children, ['smith'])
+        update.clear()
+        self.expire_session()
+
+        self.client.retry(self.client.create,
+                          self.path + '/' + 'george')
+        update.wait(4)
+        eq_(update.is_set(), False)
+        eq_(all_children, ['smith'])
+
+        children = self.client.get_children(self.path)
+        eq_(sorted(children), ['george', 'smith'])
+
+    def test_bad_children_watch_func(self):
+        counter = 0
+
+        @self.client.ChildrenWatch(self.path)
+        def changed(children):
+            if counter > 0:
+                raise Exception("oops")
+
+        raises(Exception)(changed)
+        counter += 1
+        self.client.create(self.path + '/' + 'smith')
+
+
+class KazooPatientChildrenWatcherTests(KazooTestCase):
+    def setUp(self):
+        super(KazooPatientChildrenWatcherTests, self).setUp()
+        self.path = "/" + uuid.uuid4().hex
+
+    def _makeOne(self, *args, **kwargs):
+        from kazoo.recipe.watchers import PatientChildrenWatch
+        return PatientChildrenWatch(*args, **kwargs)
+
+    def test_watch(self):
+        self.client.ensure_path(self.path)
+        watcher = self._makeOne(self.client, self.path, 0.1)
+        result = watcher.start()
+        children, asy = result.get()
+        eq_(len(children), 0)
+        eq_(asy.ready(), False)
+
+        self.client.create(self.path + '/' + 'fred')
+        asy.get(timeout=1)
+        eq_(asy.ready(), True)
+
+    def test_exception(self):
+        from kazoo.exceptions import NoNodeError
+        watcher = self._makeOne(self.client, self.path, 0.1)
+        result = watcher.start()
+
+        @raises(NoNodeError)
+        def testit():
+            result.get()
+        testit()
+
+    def test_watch_iterations(self):
+        self.client.ensure_path(self.path)
+        watcher = self._makeOne(self.client, self.path, 0.5)
+        result = watcher.start()
+        eq_(result.ready(), False)
+
+        time.sleep(0.08)
+        self.client.create(self.path + '/' + uuid.uuid4().hex)
+        eq_(result.ready(), False)
+        time.sleep(0.08)
+        eq_(result.ready(), False)
+        self.client.create(self.path + '/' + uuid.uuid4().hex)
+        time.sleep(0.08)
+        eq_(result.ready(), False)
+
+        children, asy = result.get()
+        eq_(len(children), 2)
diff --git a/slider-agent/src/main/python/kazoo/tests/util.py b/slider-agent/src/main/python/kazoo/tests/util.py
new file mode 100644
index 0000000..906cbc0
--- /dev/null
+++ b/slider-agent/src/main/python/kazoo/tests/util.py
@@ -0,0 +1,127 @@
+"""license: Apache License 2.0, see LICENSE for more details."""
+##############################################################################
+#
+# Copyright Zope Foundation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+import logging
+import os
+import time
+
+TRAVIS = os.environ.get('TRAVIS', False)
+TRAVIS_ZK_VERSION = TRAVIS and os.environ.get('ZOOKEEPER_VERSION', None)
+if TRAVIS_ZK_VERSION:
+    TRAVIS_ZK_VERSION = tuple([int(n) for n in TRAVIS_ZK_VERSION.split('.')])
+
+
+class Handler(logging.Handler):
+
+    def __init__(self, *names, **kw):
+        logging.Handler.__init__(self)
+        self.names = names
+        self.records = []
+        self.setLoggerLevel(**kw)
+
+    def setLoggerLevel(self, level=1):
+        self.level = level
+        self.oldlevels = {}
+
+    def emit(self, record):
+        self.records.append(record)
+
+    def clear(self):
+        del self.records[:]
+
+    def install(self):
+        for name in self.names:
+            logger = logging.getLogger(name)
+            self.oldlevels[name] = logger.level
+            logger.setLevel(self.level)
+            logger.addHandler(self)
+
+    def uninstall(self):
+        for name in self.names:
+            logger = logging.getLogger(name)
+            logger.setLevel(self.oldlevels[name])
+            logger.removeHandler(self)
+
+    def __str__(self):
+        return '\n'.join(
+            [("%s %s\n  %s" %
+              (record.name, record.levelname,
+               '\n'.join([line
+                          for line in record.getMessage().split('\n')
+                          if line.strip()])
+               )
+              )
+              for record in self.records]
+              )
+
+
+class InstalledHandler(Handler):
+
+    def __init__(self, *names, **kw):
+        Handler.__init__(self, *names, **kw)
+        self.install()
+
+
+class Wait(object):
+
+    class TimeOutWaitingFor(Exception):
+        "A test condition timed out"
+
+    timeout = 9
+    wait = .01
+
+    def __init__(self, timeout=None, wait=None, exception=None,
+                 getnow=(lambda: time.time), getsleep=(lambda: time.sleep)):
+
+        if timeout is not None:
+            self.timeout = timeout
+
+        if wait is not None:
+            self.wait = wait
+
+        if exception is not None:
+            self.TimeOutWaitingFor = exception
+
+        self.getnow = getnow
+        self.getsleep = getsleep
+
+    def __call__(self, func=None, timeout=None, wait=None, message=None):
+        if func is None:
+            return lambda func: self(func, timeout, wait, message)
+
+        if func():
+            return
+
+        now = self.getnow()
+        sleep = self.getsleep()
+        if timeout is None:
+            timeout = self.timeout
+        if wait is None:
+            wait = self.wait
+        wait = float(wait)
+
+        deadline = now() + timeout
+        while 1:
+            sleep(wait)
+            if func():
+                return
+            if now() > deadline:
+                raise self.TimeOutWaitingFor(
+                    message or
+                    getattr(func, '__doc__') or
+                    getattr(func, '__name__')
+                    )
+
+wait = Wait()
diff --git a/slider-agent/src/main/python/resource_management/core/logger.py b/slider-agent/src/main/python/resource_management/core/logger.py
index 7370c97..b80042a 100644
--- a/slider-agent/src/main/python/resource_management/core/logger.py
+++ b/slider-agent/src/main/python/resource_management/core/logger.py
@@ -79,7 +79,10 @@
         val = "[EMPTY]"
       # correctly output 'mode' (as they are octal values like 0755)
       elif y and x == 'mode':
-        val = oct(y)
+        try:
+          val = oct(y)
+        except:
+          val = repr(y)
       else:
         val = repr(y)
       
diff --git a/slider-agent/src/main/python/resource_management/core/providers/__init__.py b/slider-agent/src/main/python/resource_management/core/providers/__init__.py
index 0c170e7..630183b 100644
--- a/slider-agent/src/main/python/resource_management/core/providers/__init__.py
+++ b/slider-agent/src/main/python/resource_management/core/providers/__init__.py
@@ -50,6 +50,11 @@
   debian=dict(
     Package="resource_management.core.providers.package.apt.AptProvider",
   ),
+  winsrv=dict(
+    Service="resource_management.core.providers.windows.service.ServiceProvider",
+    Execute="resource_management.core.providers.windows.system.ExecuteProvider",
+    File="resource_management.core.providers.windows.system.FileProvider"
+  ),
   default=dict(
     File="resource_management.core.providers.system.FileProvider",
     Directory="resource_management.core.providers.system.DirectoryProvider",
diff --git a/slider-agent/src/main/python/resource_management/core/providers/accounts.py b/slider-agent/src/main/python/resource_management/core/providers/accounts.py
index 747f120..8711e45 100644
--- a/slider-agent/src/main/python/resource_management/core/providers/accounts.py
+++ b/slider-agent/src/main/python/resource_management/core/providers/accounts.py
@@ -22,8 +22,8 @@
 
 from __future__ import with_statement
 
-import grp
-import pwd
+#import grp
+#import pwd
 from resource_management.core import shell
 from resource_management.core.providers import Provider
 from resource_management.core.logger import Logger
@@ -70,10 +70,10 @@
 
   @property
   def user(self):
-    try:
-      return pwd.getpwnam(self.resource.username)
-    except KeyError:
-      return None
+    #try:
+    #  return pwd.getpwnam(self.resource.username)
+    #except KeyError:
+    return None
 
 
 class GroupProvider(Provider):
@@ -110,7 +110,7 @@
 
   @property
   def group(self):
-    try:
-      return grp.getgrnam(self.resource.group_name)
-    except KeyError:
-      return None
+    #try:
+    #  return grp.getgrnam(self.resource.group_name)
+    #except KeyError:
+    return None
diff --git a/slider-agent/src/main/python/resource_management/core/providers/system.py b/slider-agent/src/main/python/resource_management/core/providers/system.py
index 3475d6a..6969c62 100644
--- a/slider-agent/src/main/python/resource_management/core/providers/system.py
+++ b/slider-agent/src/main/python/resource_management/core/providers/system.py
@@ -22,9 +22,8 @@
 
 from __future__ import with_statement
 
-import grp
+import platform
 import os
-import pwd
 import time
 import shutil
 from resource_management.core import shell
@@ -33,14 +32,22 @@
 from resource_management.core.providers import Provider
 from resource_management.core.logger import Logger
 
+IS_WINDOWS = platform.system() == "Windows"
+
+if not IS_WINDOWS:
+  import grp
+  import pwd
 
 def _coerce_uid(user):
   try:
     uid = int(user)
   except ValueError:
-    try:
-      uid = pwd.getpwnam(user).pw_uid
-    except KeyError:
+    if not IS_WINDOWS:
+      try:
+        uid = pwd.getpwnam(user).pw_uid
+      except KeyError:
+       raise Fail("User %s doesn't exist." % user)
+    else:
       raise Fail("User %s doesn't exist." % user)
   return uid
 
@@ -49,10 +56,13 @@
   try:
     gid = int(group)
   except ValueError:
-    try:
-      gid = grp.getgrnam(group).gr_gid
-    except KeyError:
-      raise Fail("Group %s doesn't exist." % group)
+    if not IS_WINDOWS:
+      try:
+        gid = grp.getgrnam(group).gr_gid
+      except KeyError:
+        raise Fail("Group %s doesn't exist." % group)
+    else:
+      raise Fail("User %s doesn't exist." % user)
   return gid
 
 
@@ -232,7 +242,8 @@
         shell.checked_call(self.resource.command, logoutput=self.resource.logoutput,
                             cwd=self.resource.cwd, env=self.resource.environment,
                             preexec_fn=_preexec_fn(self.resource), user=self.resource.user,
-                            wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout)
+                            wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,
+                            pid_file=self.resource.pid_file)
         break
       except Fail as ex:
         if i == self.resource.tries-1: # last try
diff --git a/slider-agent/src/main/python/resource_management/core/providers/windows/__init__.py b/slider-agent/src/main/python/resource_management/core/providers/windows/__init__.py
new file mode 100644
index 0000000..49fddbd
--- /dev/null
+++ b/slider-agent/src/main/python/resource_management/core/providers/windows/__init__.py
@@ -0,0 +1,20 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Slider Agent
+
+"""
\ No newline at end of file
diff --git a/slider-agent/src/main/python/resource_management/core/providers/windows/service.py b/slider-agent/src/main/python/resource_management/core/providers/windows/service.py
new file mode 100644
index 0000000..4e73a2d
--- /dev/null
+++ b/slider-agent/src/main/python/resource_management/core/providers/windows/service.py
@@ -0,0 +1,65 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Slider Agent
+
+"""
+from resource_management.core.providers import Provider
+from resource_management.core.base import Fail
+import win32service
+import time
+
+
+_schSCManager = win32service.OpenSCManager(None, None, win32service.SC_MANAGER_ALL_ACCESS)
+
+
+class ServiceProvider(Provider):
+  def action_start(self):
+    self._service_handle = self._service_handle if hasattr(self, "_service_handle") else \
+      win32service.OpenService(_schSCManager, self.resource.service_name, win32service.SERVICE_ALL_ACCESS)
+    if not self.status():
+      win32service.StartService(self._service_handle, None)
+      self.wait_status(win32service.SERVICE_RUNNING)
+
+  def action_stop(self):
+    self._service_handle = self._service_handle if hasattr(self, "_service_handle") else \
+      win32service.OpenService(_schSCManager, self.resource.service_name, win32service.SERVICE_ALL_ACCESS)
+    if self.status():
+      win32service.ControlService(self._service_handle, win32service.SERVICE_CONTROL_STOP)
+      self.wait_status(win32service.SERVICE_STOPPED)
+
+  def action_restart(self):
+    self._service_handle = win32service.OpenService(_schSCManager, self.resource.service_name,
+                                                    win32service.SERVICE_ALL_ACCESS)
+    self.action_stop()
+    self.action_start()
+
+  def action_reload(self):
+    raise Fail("Reload for Service resource not supported on windows")
+
+  def status(self):
+    if win32service.QueryServiceStatusEx(self._service_handle)["CurrentState"] == win32service.SERVICE_RUNNING:
+      return True
+    return False
+
+  def get_current_status(self):
+    return win32service.QueryServiceStatusEx(self._service_handle)["CurrentState"]
+
+  def wait_status(self, status, timeout=5):
+    begin = time.time()
+    while self.get_current_status() != status and (timeout == 0 or time.time() - begin < timeout):
+      time.sleep(1)
\ No newline at end of file
diff --git a/slider-agent/src/main/python/resource_management/core/providers/windows/system.py b/slider-agent/src/main/python/resource_management/core/providers/windows/system.py
new file mode 100644
index 0000000..f0d4825
--- /dev/null
+++ b/slider-agent/src/main/python/resource_management/core/providers/windows/system.py
@@ -0,0 +1,195 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Slider Agent
+
+"""
+
+from resource_management.core.providers import Provider
+from resource_management.core.logger import Logger
+from resource_management.core.base import Fail
+from resource_management.core import ExecuteTimeoutException
+from multiprocessing import Queue
+import time
+import os
+import subprocess
+import shutil
+
+
+def _call_command(command, logoutput=False, cwd=None, env=None, wait_for_finish=True, timeout=None, pid_file_name=None):
+  # TODO implement logoutput
+  Logger.info("Executing %s" % (command))
+  proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                          cwd=cwd, env=env, shell=False)
+  if not wait_for_finish:
+    if pid_file_name:
+      pidfile = open(pid_file_name, 'w')
+      pidfile.write(str(proc.pid))
+      pidfile.close()
+    return None, None
+
+  if timeout:
+    q = Queue()
+    t = threading.Timer( timeout, on_timeout, [proc, q] )
+    t.start()
+
+  out = proc.communicate()[0].strip()
+  code = proc.returncode
+  if logoutput and out:
+    Logger.info(out)
+  return code, out
+
+# see msdn Icacls doc for rights
+def _set_file_acl(file, user, rights):
+  acls_modify_cmd = "icacls {0} /grant {1}:{2}".format(file, user, rights)
+  acls_remove_cmd = "icacls {0} /remove {1}".format(file, user)
+  code, out = _call_command(acls_remove_cmd)
+  if code != 0:
+    raise Fail("Can not remove rights for path {0} and user {1}".format(file, user))
+  code, out = _call_command(acls_modify_cmd)
+  if code != 0:
+    raise Fail("Can not set rights {0} for path {1} and user {2}".format(file, user))
+  else:
+    return
+
+class FileProvider(Provider):
+  def action_create(self):
+    path = self.resource.path
+
+    if os.path.isdir(path):
+      raise Fail("Applying %s failed, directory with name %s exists" % (self.resource, path))
+
+    dirname = os.path.dirname(path)
+    if not os.path.isdir(dirname):
+      raise Fail("Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname))
+
+    write = False
+    content = self._get_content()
+    if not os.path.exists(path):
+      write = True
+      reason = "it doesn't exist"
+    elif self.resource.replace:
+      if content is not None:
+        with open(path, "rb") as fp:
+          old_content = fp.read()
+        if content != old_content:
+          write = True
+          reason = "contents don't match"
+          if self.resource.backup:
+            self.resource.env.backup_file(path)
+
+    if write:
+      Logger.info("Writing %s because %s" % (self.resource, reason))
+      with open(path, "wb") as fp:
+        if content:
+          fp.write(content)
+
+    if self.resource.owner and self.resource.mode:
+      _set_file_acl(self.resource.path, self.resource.owner, self.resource.mode)
+
+  def action_delete(self):
+    path = self.resource.path
+
+    if os.path.isdir(path):
+      raise Fail("Applying %s failed, %s is directory not file!" % (self.resource, path))
+
+    if os.path.exists(path):
+      Logger.info("Deleting %s" % self.resource)
+      os.unlink(path)
+
+  def _get_content(self):
+    content = self.resource.content
+    if content is None:
+      return None
+    elif isinstance(content, basestring):
+      return content
+    elif hasattr(content, "__call__"):
+      return content()
+    raise Fail("Unknown source type for %s: %r" % (self, content))
+
+class ExecuteProvider(Provider):
+  def action_run(self):
+    if self.resource.creates:
+      if os.path.exists(self.resource.creates):
+        return
+
+    Logger.debug("Executing %s" % self.resource)
+
+    if self.resource.path != []:
+      if not self.resource.environment:
+        self.resource.environment = {}
+
+      self.resource.environment['PATH'] = os.pathsep.join(self.resource.path)
+
+    for i in range(0, self.resource.tries):
+      try:
+        _call_command(self.resource.command, logoutput=self.resource.logoutput,
+                      cwd=self.resource.cwd, env=self.resource.environment,
+                      wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,
+                      pid_file_name=self.resource.pid_file)
+        break
+      except Fail as ex:
+        if i == self.resource.tries - 1:  # last try
+          raise ex
+        else:
+          Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex)))
+          time.sleep(self.resource.try_sleep)
+      except ExecuteTimeoutException:
+        err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (
+          self.resource.command, self.resource.timeout)
+
+        if self.resource.on_timeout:
+          Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg))
+          _call_command(self.resource.on_timeout)
+        else:
+          raise Fail(err_msg)
+
+
+class DirectoryProvider(Provider):
+  def action_create(self):
+    path = DirectoryProvider._trim_uri(self.resource.path)
+    if not os.path.exists(path):
+      Logger.info("Creating directory %s" % self.resource)
+      if self.resource.recursive:
+        os.makedirs(path)
+      else:
+        dirname = os.path.dirname(path)
+        if not os.path.isdir(dirname):
+          raise Fail("Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname))
+
+        os.mkdir(path)
+
+    if not os.path.isdir(path):
+      raise Fail("Applying %s failed, file %s already exists" % (self.resource, path))
+
+    if self.resource.owner and self.resource.mode:
+      _set_file_acl(path, self.resource.owner, self.resource.mode)
+
+  def action_delete(self):
+    path = self.resource.path
+    if os.path.exists(path):
+      if not os.path.isdir(path):
+        raise Fail("Applying %s failed, %s is not a directory" % (self.resource, path))
+
+      Logger.info("Removing directory %s and all its content" % self.resource)
+      shutil.rmtree(path)
+
+  @staticmethod
+  def _trim_uri(file_uri):
+    if file_uri.startswith("file:///"):
+      return file_uri[8:]
+    return file_uri
diff --git a/slider-agent/src/main/python/resource_management/core/resources/system.py b/slider-agent/src/main/python/resource_management/core/resources/system.py
index 2c832a4..a63d993 100644
--- a/slider-agent/src/main/python/resource_management/core/resources/system.py
+++ b/slider-agent/src/main/python/resource_management/core/resources/system.py
@@ -101,6 +101,10 @@
   - try_sleep
   """
   wait_for_finish = BooleanArgument(default=True)
+  """
+  if wait_for_finish is True then optionally the caller can ask for the pid to be written
+  """
+  pid_file = ResourceArgument()
 
 
 class ExecuteScript(Resource):
diff --git a/slider-agent/src/main/python/resource_management/core/shell.py b/slider-agent/src/main/python/resource_management/core/shell.py
index 92312d5..fb2c946 100644
--- a/slider-agent/src/main/python/resource_management/core/shell.py
+++ b/slider-agent/src/main/python/resource_management/core/shell.py
@@ -31,15 +31,15 @@
 from resource_management.core.logger import Logger
 
 def checked_call(command, logoutput=False, 
-         cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None):
-  return _call(command, logoutput, True, cwd, env, preexec_fn, user, wait_for_finish, timeout)
+         cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, pid_file=None):
+  return _call(command, logoutput, True, cwd, env, preexec_fn, user, wait_for_finish, timeout, pid_file)
 
 def call(command, logoutput=False, 
-         cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None):
-  return _call(command, logoutput, False, cwd, env, preexec_fn, user, wait_for_finish, timeout)
+         cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, pid_file=None):
+  return _call(command, logoutput, False, cwd, env, preexec_fn, user, wait_for_finish, timeout, pid_file)
             
 def _call(command, logoutput=False, throw_on_failure=True, 
-         cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None):
+         cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, pid_file_name=None):
   """
   Execute shell command
   
@@ -67,6 +67,10 @@
                           preexec_fn=preexec_fn)
 
   if not wait_for_finish:
+    if pid_file_name:
+      pidfile = open(pid_file_name, 'w')
+      pidfile.write(str(proc.pid))
+      pidfile.close()
     return None, None
   
   if timeout:
diff --git a/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py b/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py
index ad30707..cc0610d 100644
--- a/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py
+++ b/slider-agent/src/main/python/resource_management/libraries/functions/__init__.py
@@ -28,4 +28,4 @@
 from resource_management.libraries.functions.is_empty import *
 from resource_management.libraries.functions.substitute_vars import *
 from resource_management.libraries.functions.os_check import *
-from resource_management.libraries.functions.get_port_from_url import *
\ No newline at end of file
+from resource_management.libraries.functions.get_port_from_url import *
diff --git a/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py b/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py
index abfceb8..8c11d93 100644
--- a/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py
+++ b/slider-agent/src/main/python/resource_management/libraries/functions/os_check.py
@@ -31,41 +31,65 @@
   PYTHON_VER = sys.version_info[0] * 10 + sys.version_info[1]
 
   if PYTHON_VER < 26:
-    linux_distribution = platform.dist()
+    (distname, version, id)  = platform.dist()
   else:
-    linux_distribution = platform.linux_distribution()
+    (distname, version, id) = platform.linux_distribution()
 
-  return linux_distribution
+  return (platform.system(), os.name, distname, version, id)
 
+def windows_distribution():
+  # Only support Windows Server 64 bit
+  (win_release, win_version, win_csd, win_ptype) = platform.win32_ver()
+  #if win_version
+  return (platform.system(), os.name, win_release, win_version, win_ptype)
 
 class OS_CONST_TYPE(type):
+  # os platforms
+  LINUX_OS = 'linux'
+  WINDOWS_OS = 'windows'
+
   # os families
   REDHAT_FAMILY = 'redhat'
   DEBIAN_FAMILY = 'debian'
   SUSE_FAMILY = 'suse'
+  WINSRV_FAMILY = 'winsrv'
 
   # Declare here os type mapping
   OS_FAMILY_COLLECTION = [
-    {'name': REDHAT_FAMILY,
-     'os_list':
-       ['redhat', 'fedora', 'centos', 'oraclelinux',
-        'ascendos', 'amazon', 'xenserver', 'oel', 'ovs',
-        'cloudlinux', 'slc', 'scientific', 'psbm',
-        'centos linux']
-    },
-    {'name': DEBIAN_FAMILY,
-     'os_list': ['ubuntu', 'debian']
-    },
-    {'name': SUSE_FAMILY,
-     'os_list': ['sles', 'sled', 'opensuse', 'suse']
-    }
-  ]
+                            {'name': REDHAT_FAMILY,
+                             'os_list':
+                                ['redhat', 'fedora', 'centos', 'oraclelinux',
+                                 'ascendos', 'amazon', 'xenserver', 'oel', 'ovs',
+                                 'cloudlinux', 'slc', 'scientific', 'psbm',
+                                 'centos linux']
+                             },
+                            {'name': DEBIAN_FAMILY,
+                             'os_list': ['ubuntu', 'debian']
+                             },
+                            {'name': SUSE_FAMILY,
+                             'os_list': ['sles', 'sled', 'opensuse', 'suse']
+                             }
+                           ]
+  WIN_OS_FAMILY_COLLECTION = [
+                            {'name': WINSRV_FAMILY,
+                             'os_list':
+                                ['2008Server', '2012Server']
+                             },
+                           ]
   # Would be generated from Family collection definition
   OS_COLLECTION = []
 
   def __init__(cls, name, bases, dct):
-    for item in cls.OS_FAMILY_COLLECTION:
-      cls.OS_COLLECTION += item['os_list']
+    if platform.system() == 'Windows':
+      for item in cls.WIN_OS_FAMILY_COLLECTION:
+        cls.OS_COLLECTION += item['os_list']
+    else:
+      if platform.system() == 'Mac':
+        raise Exception("MacOS not supported. Exiting...")
+      else:
+        dist = linux_distribution()
+        for item in cls.OS_FAMILY_COLLECTION:
+          cls.OS_COLLECTION += item['os_list']
 
   def __getattr__(cls, name):
     """
@@ -81,17 +105,45 @@
     else:
       raise Exception("Unknown class property '%s'" % name)
 
+def get_os_distribution():
+  if platform.system() == 'Windows':
+    dist = windows_distribution()
+  else:
+    if platform.system() == 'Darwin':
+      dist = ("Darwin", "TestOnly", "1.1.1", "1.1.1", "1.1")
+    else:
+      # Linux
+      # Read content from /etc/*-release file
+      # Full release name
+      dist = linux_distribution()
+  return dist
 
 class OSConst:
   __metaclass__ = OS_CONST_TYPE
 
 
 class OSCheck:
+  _dist = get_os_distribution()
+
+  @staticmethod
+  def get_os_os():
+    """
+    Return values:
+    windows, linux
+
+    In case cannot detect - exit.
+    """
+    # Read content from /etc/*-release file
+    # Full release name
+    os_os = OSCheck._dist[0].lower()
+
+    return os_os
 
   @staticmethod
   def get_os_type():
     """
     Return values:
+    2008server, 2012server,
     redhat, fedora, centos, oraclelinux, ascendos,
     amazon, xenserver, oel, ovs, cloudlinux, slc, scientific, psbm,
     ubuntu, debian, sles, sled, opensuse, suse ... and others
@@ -100,15 +152,14 @@
     """
     # Read content from /etc/*-release file
     # Full release name
-    dist = linux_distribution()
-    operatingSystem = dist[0].lower()
+    operatingSystem  = OSCheck._dist[2].lower()
 
     # special cases
     if os.path.exists('/etc/oracle-release'):
       return 'oraclelinux'
     elif operatingSystem.startswith('suse linux enterprise server'):
       return 'sles'
-    elif operatingSystem.startswith('red hat enterprise linux server'):
+    elif operatingSystem.startswith('red hat enterprise linux'):
       return 'redhat'
 
     if operatingSystem != '':
@@ -124,11 +175,14 @@
 
     In case cannot detect raises exception( from self.get_operating_system_type() ).
     """
-    os_family = OSCheck.get_os_type()
-    for os_family_item in OSConst.OS_FAMILY_COLLECTION:
-      if os_family in os_family_item['os_list']:
-        os_family = os_family_item['name']
-        break
+    if(OSCheck._dist[0] == 'Windows'):
+      os_family = OSConst.WIN_OS_FAMILY_COLLECTION[0]['name']
+    else:
+      os_family = OSCheck.get_os_type()
+      for os_family_item in OSConst.OS_FAMILY_COLLECTION:
+        if os_family in os_family_item['os_list']:
+          os_family = os_family_item['name']
+          break
 
     return os_family.lower()
 
@@ -139,15 +193,12 @@
 
     In case cannot detect raises exception.
     """
-    # Read content from /etc/*-release file
-    # Full release name
-    dist = linux_distribution()
-    dist = dist[1]
+    dist = OSCheck._dist[3]
 
     if dist:
       return dist
     else:
-      raise Exception("Cannot detect os version. Exiting...")
+      raise Exception("Cannot detect os version from " + repr(OSCheck._dist) + " Exiting...")
 
   @staticmethod
   def get_os_major_version():
@@ -165,8 +216,7 @@
 
     In case cannot detect raises exception.
     """
-    dist = linux_distribution()
-    dist = dist[2].lower()
+    dist = OSCheck._dist[4].lower()
 
     if dist:
       return dist
@@ -217,6 +267,54 @@
       pass
     return False
 
+  @staticmethod
+  def is_windows_family():
+    """
+     Return true if it is so or false if not
+
+     This is safe check for windows family, doesn't generate exception
+    """
+    try:
+      if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY:
+        return True
+    except Exception:
+      pass
+    return False
+
+  @staticmethod
+  def is_linux_os():
+    """
+     Return true if it is so or false if not
+
+     This is safe check for linux os, doesn't generate exception
+    """
+    try:
+      if OSCheck.get_os_os() == OSConst.LINUX_OS:
+        return True
+    except Exception:
+      pass
+    return False
+
+  @staticmethod
+  def is_windows_os():
+    """
+     Return true if it is so or false if not
+
+     This is safe check for windows os, doesn't generate exception
+    """
+    try:
+      if OSCheck.get_os_os() == OSConst.WINDOWS_OS:
+        return True
+    except Exception:
+      pass
+    return False
+
+
+# OS info
+OS_VERSION = OSCheck().get_os_major_version()
+OS_TYPE = OSCheck.get_os_type()
+OS_FAMILY = OSCheck.get_os_family()
+OS_OS = OSCheck.get_os_os()
 
 if __name__ == "__main__":
   main()
diff --git a/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py b/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py
index 973958b..1dfeef7 100644
--- a/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py
+++ b/slider-agent/src/main/python/resource_management/libraries/providers/__init__.py
@@ -30,6 +30,8 @@
   debian=dict(
     Repository="resource_management.libraries.providers.repository.DebianRepositoryProvider",
   ),
+  winsrv=dict(
+  ),
   default=dict(
     ExecuteHadoop="resource_management.libraries.providers.execute_hadoop.ExecuteHadoopProvider",
     TemplateConfig="resource_management.libraries.providers.template_config.TemplateConfigProvider",
diff --git a/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py b/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py
index 5817879..7750d25 100644
--- a/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py
+++ b/slider-agent/src/main/python/resource_management/libraries/providers/monitor_webserver.py
@@ -42,7 +42,7 @@
 
   def get_serivice_params(self):
     self.system = System.get_instance()
-    if self.system.os_family == "suse":
+    if self.system.os_family in ["suse","debian"]:
       self.service_name = "apache2"
       self.httpd_conf_dir = '/etc/apache2'
     else:
diff --git a/slider-agent/src/main/python/resource_management/libraries/script/script.py b/slider-agent/src/main/python/resource_management/libraries/script/script.py
index 624d65e..00b80b4 100644
--- a/slider-agent/src/main/python/resource_management/libraries/script/script.py
+++ b/slider-agent/src/main/python/resource_management/libraries/script/script.py
@@ -24,6 +24,7 @@
 import sys
 import json
 import logging
+import shutil
 
 from resource_management.core.environment import Environment
 from resource_management.core.exceptions import Fail, ClientComponentHasNoStatus, ComponentIsNotRunning
@@ -32,6 +33,7 @@
 from resource_management.core.resources import Directory
 from resource_management.libraries.script.config_dictionary import ConfigDictionary
 from resource_management.libraries.script.repo_installer import RepoInstaller
+from resource_management.core.logger import Logger
 
 USAGE = """Usage: {0} <COMMAND> <JSON_CONFIG> <BASEDIR> <STROUTPUT> <LOGGING_LEVEL>
 
@@ -178,12 +180,23 @@
             Directory(install_location, action = "delete")
             Directory(install_location)
             Tarball(tarball, location=install_location)
+          elif type.lower() == "folder":
+            if name.startswith(os.path.sep):
+              src = name
+            else:
+              basedir = env.config.basedir
+              src = os.path.join(basedir, name)
+            dest = config['configurations']['global']['app_install_dir']
+            Directory(dest, action = "delete")
+            Logger.info("Copying from " + src + " to " + dest)
+            shutil.copytree(src, dest)
           else:
             if not repo_installed:
               RepoInstaller.install_repos(config)
               repo_installed = True
             Package(name)
-    except KeyError:
+    except KeyError, e:
+      Logger.info("Error installing packages. " + repr(e))
       pass # No reason to worry
 
     #RepoInstaller.remove_repos(config)
diff --git a/slider-agent/src/test/python/agent/TestActionQueue.py b/slider-agent/src/test/python/agent/TestActionQueue.py
index b3a840c..8071ee8 100644
--- a/slider-agent/src/test/python/agent/TestActionQueue.py
+++ b/slider-agent/src/test/python/agent/TestActionQueue.py
@@ -209,7 +209,7 @@
   @patch("traceback.print_exc")
   @patch.object(ActionQueue, "execute_command")
   @patch.object(ActionQueue, "execute_status_command")
-  def test_process_command(self, execute_status_command_mock,
+  def test_process_command2(self, execute_status_command_mock,
                            execute_command_mock, print_exc_mock):
     dummy_controller = MagicMock()
     actionQueue = ActionQueue(AgentConfig("", ""), dummy_controller)
@@ -272,6 +272,7 @@
   def test_execute_command(self, status_update_callback_mock, open_mock, json_load_mock,
                            resolve_script_path_mock):
 
+    self.assertEqual.__self__.maxDiff = None
     tempdir = tempfile.gettempdir()
     config = MagicMock()
     config.get.return_value = "something"
@@ -342,7 +343,8 @@
                 'role': u'HBASE_MASTER',
                 'actionId': '1-1',
                 'taskId': 3,
-                'exitcode': 777}
+                'exitcode': 777,
+                'reportResult': True}
     self.assertEqual(report['reports'][0], expected)
     # Continue command execution
     unfreeze_flag.set()
@@ -365,7 +367,8 @@
                 'structuredOut': '',
                 'exitcode': 0,
                 'allocatedPorts': {},
-                'folders': {'AGENT_LOG_ROOT': tempdir, 'AGENT_WORK_ROOT': tempdir}}
+                'folders': {'AGENT_LOG_ROOT': tempdir, 'AGENT_WORK_ROOT': tempdir},
+                'reportResult': True}
     self.assertEqual(len(report['reports']), 1)
     self.assertEqual(report['reports'][0], expected)
     self.assertTrue(os.path.isfile(configname))
@@ -403,7 +406,8 @@
                 'actionId': '1-1',
                 'taskId': 3,
                 'structuredOut': '',
-                'exitcode': 13}
+                'exitcode': 13,
+                'reportResult': True}
     self.assertEqual(len(report['reports']), 1)
     self.assertEqual(report['reports'][0], expected)
 
diff --git a/slider-agent/src/test/python/agent/TestCommandStatusDict.py b/slider-agent/src/test/python/agent/TestCommandStatusDict.py
new file mode 100644
index 0000000..ee91de6
--- /dev/null
+++ b/slider-agent/src/test/python/agent/TestCommandStatusDict.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import StringIO
+import ssl
+import unittest, threading
+import sys
+from CommandStatusDict import CommandStatusDict
+from mock.mock import patch, MagicMock, call, Mock
+import logging
+from threading import Event
+
+class TestCommandStatusDict(unittest.TestCase):
+
+  logger = logging.getLogger()
+
+  auto_hbase_install_command = {
+    'commandType': 'EXECUTION_COMMAND',
+    'role': u'HBASE',
+    'roleCommand': u'INSTALL',
+    'commandId': '1-1',
+    'taskId': 7,
+    "componentName": "HBASE_MASTER",
+    'clusterName': u'cc',
+    'serviceName': u'HDFS',
+    'auto_generated': True
+  }
+
+  @patch("__builtin__.open")
+  def test_generate_progress_report(self, open_mock):
+    csd = CommandStatusDict(None)
+    report = {}
+    report['tmpout'] = None
+    report['tmperr'] = None
+    report['structuredOut'] = None
+
+    # Make file read calls visible
+    def open_side_effect(file, mode):
+      if mode == 'r':
+        file_mock = MagicMock()
+        file_mock.read.return_value = "Read from " + str(file)
+        return file_mock
+      else:
+        return self.original_open(file, mode)
+
+    open_mock.side_effect = open_side_effect
+
+    inprogress = csd.generate_in_progress_report(self.auto_hbase_install_command, report)
+    expected = {
+      'status': 'IN_PROGRESS',
+      'stderr': 'Read from None',
+      'stdout': 'Read from None',
+      'clusterName': u'cc',
+      'structuredOut': '{}',
+      'reportResult': False,
+      'roleCommand': u'INSTALL',
+      'serviceName': u'HDFS',
+      'role': u'HBASE',
+      'actionId': '1-1',
+      'taskId': 7,
+      'exitcode': 777}
+    self.assertEqual(inprogress, expected)
+
+    self.auto_hbase_install_command['auto_generated'] = False
+    inprogress = csd.generate_in_progress_report(self.auto_hbase_install_command, report)
+    expected['reportResult'] = True
+    self.assertEqual(inprogress, expected)
+    pass
+
+if __name__ == "__main__":
+  logging.basicConfig(format='%(asctime)s %(message)s',level=logging.DEBUG)
+  unittest.main()
+
+
+
+
diff --git a/slider-agent/src/test/python/agent/TestController.py b/slider-agent/src/test/python/agent/TestController.py
index 939e63f..401d69a 100644
--- a/slider-agent/src/test/python/agent/TestController.py
+++ b/slider-agent/src/test/python/agent/TestController.py
@@ -42,7 +42,7 @@
   @patch.object(hostname, "hostname")
   def setUp(self, hostname_method, NetUtil_mock, lockMock, threadMock):
 
-    Controller.logger = MagicMock()
+    #Controller.logger = MagicMock()
     lockMock.return_value = MagicMock()
     NetUtil_mock.return_value = MagicMock()
     hostname_method.return_value = "test_hostname"
@@ -55,6 +55,7 @@
     self.controller = Controller.Controller(config)
     self.controller.netutil.MINIMUM_INTERVAL_BETWEEN_HEARTBEATS = 0.1
     self.controller.netutil.HEARTBEAT_NOT_IDDLE_INTERVAL_SEC = 0.1
+    self.controller.actionQueue = ActionQueue.ActionQueue(config, self.controller)
 
 
   @patch("json.dumps")
@@ -152,9 +153,9 @@
 
   @patch("urllib2.build_opener")
   @patch("urllib2.install_opener")
-  @patch.object(ActionQueue.ActionQueue, "run")
+  @patch.object(ActionQueue.ActionQueue, "start")
   def test_repeatRegistration(self,
-                              run_mock, installMock, buildMock):
+                              start_mock, installMock, buildMock):
 
     registerAndHeartbeat = MagicMock(name="registerAndHeartbeat")
 
@@ -162,6 +163,7 @@
     self.controller.run()
     self.assertTrue(installMock.called)
     self.assertTrue(buildMock.called)
+    self.assertTrue(start_mock.called)
     self.controller.registerAndHeartbeat.assert_called_once_with()
 
     calls = []
@@ -288,7 +290,7 @@
     self.controller.sendRequest = sendRequest
 
     self.controller.responseId = 1
-    response = {"responseId":"2", "restartAgent":"false"}
+    response = {"responseId":"2", "restartAgent": False}
     loadsMock.return_value = response
 
     def one_heartbeat(*args, **kwargs):
@@ -588,9 +590,139 @@
     self.controller.config = original_value
     pass
 
+  def test_create_start_command(self):
+    stored_command = {
+      'commandType': 'EXECUTION_COMMAND',
+      'role': u'HBASE_MASTER',
+      "componentName": "HBASE_MASTER",
+      'roleCommand': u'INSTALL',
+      'commandId': '1-1',
+      'taskId': 3,
+      'clusterName': u'cc',
+      'serviceName': u'HBASE',
+      'configurations': {'global': {}},
+      'configurationTags': {'global': {'tag': 'v1'}},
+      'auto_generated': False,
+      'roleParams': {'auto_restart':'false'},
+      'commandParams': {'script_type': 'PYTHON',
+                        'script': 'scripts/abc.py',
+                        'command_timeout': '600'}
+    }
+
+    expected = {
+      'commandType': 'EXECUTION_COMMAND',
+      'role': u'HBASE_MASTER',
+      "componentName": "HBASE_MASTER",
+      'roleCommand': u'INSTALL',
+      'commandId': '4-1',
+      'taskId': 4,
+      'clusterName': u'cc',
+      'serviceName': u'HBASE',
+      'configurations': {'global': {}},
+      'configurationTags': {'global': {'tag': 'v1'}},
+      'auto_generated': False,
+      'roleParams': {'auto_restart':'false'},
+      'commandParams': {'script_type': 'PYTHON',
+                        'script': 'scripts/abc.py',
+                        'command_timeout': '600'},
+      'auto_generated': True
+    }
+
+    modified_command = self.controller.create_start_command(stored_command)
+    self.assertEqual.__self__.maxDiff = None
+    self.assertEqual(modified_command, expected)
+
+  @patch.object(Controller.Controller, "createStatusCommand")
+  @patch.object(threading._Event, "wait")
+  @patch("time.sleep")
+  @patch("json.loads")
+  @patch("json.dumps")
+  def test_auto_start(self, dumpsMock, loadsMock, timeMock, waitMock, mock_createStatusCommand):
+    original_value = self.controller.config
+    self.controller.config = AgentConfig("", "")
+    out = StringIO.StringIO()
+    sys.stdout = out
+
+    heartbeat = MagicMock()
+    self.controller.heartbeat = heartbeat
+
+    dumpsMock.return_value = "data"
+
+    sendRequest = MagicMock(name="sendRequest")
+    self.controller.sendRequest = sendRequest
+
+    self.controller.responseId = 1
+    response1 = {"responseId": "2", "restartAgent": False, "restartEnabled": True}
+    response2 = {"responseId": "2", "restartAgent": False, "restartEnabled": False}
+    loadsMock.side_effect = [response1, response2, response1]
+
+    def one_heartbeat(*args, **kwargs):
+      self.controller.DEBUG_STOP_HEARTBEATING = True
+      return "data"
+
+    sendRequest.side_effect = one_heartbeat
+
+    actionQueue = MagicMock()
+    actionQueue.isIdle.return_value = True
+
+    # one successful request, after stop
+    self.controller.actionQueue = actionQueue
+    self.controller.componentActualState = State.FAILED
+    self.controller.componentExpectedState = State.STARTED
+    self.assertTrue(self.controller.componentActualState, State.FAILED)
+    self.controller.actionQueue.customServiceOrchestrator.stored_command = {
+      'commandType': 'EXECUTION_COMMAND',
+      'role': u'HBASE',
+      'roleCommand': u'START',
+      'commandId': '7-1',
+      'taskId': 7,
+      "componentName": "HBASE_MASTER",
+      'clusterName': u'cc',
+      'serviceName': u'HDFS'
+    }
+    addToQueue = MagicMock(name="addToQueue")
+    self.controller.addToQueue = addToQueue
+
+    self.controller.heartbeatWithServer()
+    self.assertTrue(sendRequest.called)
+
+    self.assertTrue(self.controller.componentActualState, State.STARTING)
+    self.assertTrue(self.controller.componentExpectedState, State.STARTED)
+    self.assertEquals(self.controller.failureCount, 0)
+    self.assertFalse(mock_createStatusCommand.called)
+    addToQueue.assert_has_calls([call([{
+      'commandType': 'EXECUTION_COMMAND',
+      'clusterName': u'cc',
+      'serviceName': u'HDFS',
+      'role': u'HBASE',
+      'taskId': 8,
+      'roleCommand': u'START',
+      'componentName': 'HBASE_MASTER',
+      'commandId': '8-1',
+      'auto_generated': True}])])
+    self.controller.config = original_value
+
+    # restartEnabled = False
+    self.controller.componentActualState = State.FAILED
+    self.controller.heartbeatWithServer()
+
+    self.assertTrue(sendRequest.called)
+    self.assertTrue(self.controller.componentActualState, State.FAILED)
+    self.assertTrue(self.controller.componentExpectedState, State.STARTED)
+
+    # restartEnabled = True
+    self.controller.componentActualState = State.INSTALLED
+    self.controller.componentExpectedState = State.INSTALLED
+    self.controller.heartbeatWithServer()
+
+    self.assertTrue(sendRequest.called)
+    self.assertTrue(self.controller.componentActualState, State.INSTALLED)
+    self.assertTrue(self.controller.componentExpectedState, State.INSTALLED)
+    pass
+
 
 if __name__ == "__main__":
-  logging.basicConfig(format='%(asctime)s %(message)s',level=logging.DEBUG)
+  logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
   unittest.main()
 
 
diff --git a/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py b/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py
index d2439b1..e545afe 100644
--- a/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py
+++ b/slider-agent/src/test/python/agent/TestCustomServiceOrchestrator.py
@@ -34,9 +34,11 @@
 from mock.mock import MagicMock, patch
 import StringIO
 import sys
+from socket import socket
 
 
 class TestCustomServiceOrchestrator(TestCase):
+
   def setUp(self):
     # disable stdout
     out = StringIO.StringIO()
@@ -178,13 +180,13 @@
     pass
 
 
-  @patch.object(CustomServiceOrchestrator, "allocate_port")
+  @patch.object(CustomServiceOrchestrator, "allocate_ports")
   @patch.object(CustomServiceOrchestrator, "resolve_script_path")
   @patch.object(PythonExecutor, "run_file")
   def test_runCommand_get_port(self,
                                run_file_mock,
                                resolve_script_path_mock,
-                               allocate_port_mock):
+                               allocate_ports_mock):
     command = {
       'role': 'HBASE_REGIONSERVER',
       'hostLevelParams': {
@@ -212,7 +214,7 @@
     config.getWorkRootPath.return_value = tempdir
     config.getLogPath.return_value = tempdir
 
-    allocate_port_mock.return_value = 10233
+    allocate_ports_mock.return_value = str(10233)
 
     resolve_script_path_mock.return_value = "/basedir/scriptpath"
     dummy_controller = MagicMock()
@@ -225,11 +227,119 @@
     }
     ret = orchestrator.runCommand(command, "out.txt", "err.txt")
     self.assertEqual(ret['exitcode'], 0)
-    self.assertEqual(ret['allocated_ports'], {'a.port': '10233'})
+    self.assertEqual(ret['allocated_ports'], {'a.a.port': '10233'})
     self.assertTrue(run_file_mock.called)
     self.assertEqual(run_file_mock.call_count, 1)
+    self.assertEqual(orchestrator.allocated_ports, {'a.a.port': '10233'})
+    self.assertEqual(orchestrator.stored_command, {})
 
 
+  @patch.object(socket, "close")
+  @patch.object(socket, "connect")
+  def test_allocate_port_def(self, socket_connect_mock, socket_close_mock):
+    e = OSError()
+    socket_connect_mock.side_effect = e
+    tempdir = tempfile.gettempdir()
+    config = MagicMock()
+    config.get.return_value = "something"
+    config.getResolvedPath.return_value = tempdir
+    config.getWorkRootPath.return_value = tempdir
+    config.getLogPath.return_value = tempdir
+
+    dummy_controller = MagicMock()
+    orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+    ret = orchestrator.allocate_port(10)
+    self.assertEqual(ret, 10)
+
+  @patch.object(socket, "getsockname")
+  @patch.object(socket, "bind")
+  @patch.object(socket, "close")
+  @patch.object(socket, "connect")
+  def test_allocate_port_new(self, socket_connect_mock, socket_close_mock,
+                         socket_bind_mock, socket_getsockname_mock):
+    tempdir = tempfile.gettempdir()
+    config = MagicMock()
+    config.get.return_value = "something"
+    config.getResolvedPath.return_value = tempdir
+    config.getWorkRootPath.return_value = tempdir
+    config.getLogPath.return_value = tempdir
+
+    dummy_controller = MagicMock()
+    orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+    socket_getsockname_mock.return_value = [100, 101]
+    ret = orchestrator.allocate_port(10)
+    self.assertEqual(ret, 101)
+
+  @patch.object(socket, "getsockname")
+  @patch.object(socket, "bind")
+  def test_allocate_port_no_def(self, socket_bind_mock, socket_getsockname_mock):
+    tempdir = tempfile.gettempdir()
+    config = MagicMock()
+    config.get.return_value = "something"
+    config.getResolvedPath.return_value = tempdir
+    config.getWorkRootPath.return_value = tempdir
+    config.getLogPath.return_value = tempdir
+
+    dummy_controller = MagicMock()
+    orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+    socket_getsockname_mock.return_value = [100, 102]
+    ret = orchestrator.allocate_port()
+    self.assertEqual(ret, 102)
+
+
+  @patch.object(CustomServiceOrchestrator, "is_port_available")
+  @patch.object(CustomServiceOrchestrator, "allocate_port")
+  def test_allocate_port_combinations(self, allocate_port_mock, is_port_available_mock):
+    tempdir = tempfile.gettempdir()
+    config = MagicMock()
+    config.get.return_value = "something"
+    config.getResolvedPath.return_value = tempdir
+    config.getWorkRootPath.return_value = tempdir
+    config.getLogPath.return_value = tempdir
+
+    dummy_controller = MagicMock()
+    orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+
+    is_port_available_mock.return_value = False
+    allocate_port_mock.side_effect = [101, 102, 103, 104, 105, 106]
+    ret = orchestrator.allocate_ports("1000", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "1000")
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "101")
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT},${A.ALLOCATED_PORT}", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "102,103")
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_0}", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "104")
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_0}{DO_NOT_PROPAGATE}", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "105")
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DO_NOT_PROPAGATE}", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "106")
+
+
+  @patch.object(CustomServiceOrchestrator, "is_port_available")
+  def test_allocate_port_combinations2(self, is_port_available_mock):
+    tempdir = tempfile.gettempdir()
+    config = MagicMock()
+    config.get.return_value = "something"
+    config.getResolvedPath.return_value = tempdir
+    config.getWorkRootPath.return_value = tempdir
+    config.getLogPath.return_value = tempdir
+
+    dummy_controller = MagicMock()
+    orchestrator = CustomServiceOrchestrator(config, dummy_controller)
+
+    is_port_available_mock.return_value = True
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_1005}", "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "1005")
+
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_1005}-${A.ALLOCATED_PORT}{DEFAULT_1006}",
+                                      "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "1005-1006")
+
+    ret = orchestrator.allocate_ports("${A.ALLOCATED_PORT}{DEFAULT_1006}{DO_NOT_PROPAGATE}",
+                                      "${A.ALLOCATED_PORT}")
+    self.assertEqual(ret, "1006")
+
   @patch("hostname.public_hostname")
   @patch("os.path.isfile")
   @patch("os.unlink")
@@ -308,9 +418,10 @@
     }
 
     ret = orchestrator.runCommand(command, "out.txt", "err.txt", True, True)
+    self.assertEqual.__self__.maxDiff = None
     self.assertEqual(ret['exitcode'], 0)
     self.assertTrue(run_file_mock.called)
-    self.assertEqual(orchestrator.applied_configs, expected)
+    self.assertEqual(orchestrator.stored_command, command)
 
     ret = orchestrator.requestComponentStatus(command_get)
     self.assertEqual(ret['configurations'], expected)
@@ -353,8 +464,8 @@
     status = orchestrator.requestComponentStatus(status_command)
     self.assertEqual(CustomServiceOrchestrator.DEAD_STATUS, status['exitcode'])
 
-  @patch.object(CustomServiceOrchestrator, "allocate_port")
-  def test_finalize_command(self, mock_allocate_port):
+  @patch.object(CustomServiceOrchestrator, "allocate_ports")
+  def test_finalize_command(self, mock_allocate_ports):
     dummy_controller = MagicMock()
     tempdir = tempfile.gettempdir()
     tempWorkDir = tempdir + "W"
@@ -363,7 +474,7 @@
     config.getResolvedPath.return_value = tempdir
     config.getWorkRootPath.return_value = tempWorkDir
     config.getLogPath.return_value = tempdir
-    mock_allocate_port.return_value = "10023"
+    mock_allocate_ports.return_value = "10023"
 
     orchestrator = CustomServiceOrchestrator(config, dummy_controller)
     command = {}
@@ -377,16 +488,24 @@
     command['configurations']['oozie-site'] = {}
     command['configurations']['oozie-site']['log_root'] = "${AGENT_LOG_ROOT}"
     command['configurations']['oozie-site']['a_port'] = "${HBASE_MASTER.ALLOCATED_PORT}"
+    command['configurations']['oozie-site']['ignore_port1'] = "[${HBASE_RS.ALLOCATED_PORT}]"
+    command['configurations']['oozie-site']['ignore_port2'] = "[${HBASE_RS.ALLOCATED_PORT},${HBASE_REST.ALLOCATED_PORT}{DO_NOT_PROPAGATE}]"
+    command['configurations']['oozie-site']['ignore_port3'] = "[${HBASE_RS.ALLOCATED_PORT}{a}{b}{c},${A.ALLOCATED_PORT}{DO_NOT_PROPAGATE},${A.ALLOCATED_PORT}{DEFAULT_3}{DO_NOT_PROPAGATE}]"
+    command['configurations']['oozie-site']['ignore_port4'] = "${HBASE_RS}{a}{b}{c}"
 
     allocated_ports = {}
     orchestrator.finalize_command(command, False, allocated_ports)
     self.assertEqual(command['configurations']['hbase-site']['work_root'], tempWorkDir)
     self.assertEqual(command['configurations']['oozie-site']['log_root'], tempdir)
     self.assertEqual(command['configurations']['oozie-site']['a_port'], "10023")
-    self.assertEqual(orchestrator.applied_configs, {})
+    self.assertEqual(command['configurations']['oozie-site']['ignore_port1'], "[0]")
+    self.assertEqual(command['configurations']['oozie-site']['ignore_port2'], "[0,0]")
+    self.assertEqual(command['configurations']['oozie-site']['ignore_port3'], "[0,0,0]")
+    self.assertEqual(command['configurations']['oozie-site']['ignore_port4'], "${HBASE_RS}{a}{b}{c}")
+    self.assertEqual(orchestrator.stored_command, {})
     self.assertEqual(len(allocated_ports), 1)
-    self.assertTrue('a_port' in allocated_ports)
-    self.assertEqual(allocated_ports['a_port'], '10023')
+    self.assertTrue('oozie-site.a_port' in allocated_ports)
+    self.assertEqual(allocated_ports['oozie-site.a_port'], '10023')
 
     command['configurations']['hbase-site']['work_root'] = "${AGENT_WORK_ROOT}"
     command['configurations']['hbase-site']['log_root'] = "${AGENT_LOG_ROOT}/log"
@@ -397,8 +516,8 @@
     orchestrator.finalize_command(command, True, {})
     self.assertEqual(command['configurations']['hbase-site']['log_root'], tempdir + "/log")
     self.assertEqual(command['configurations']['hbase-site']['blog_root'], "/b/" + tempdir + "/log")
-    self.assertEqual(command['configurations']['oozie-site']['b_port'], "${HBASE_REGIONSERVER.ALLOCATED_PORT}")
-    self.assertEqual(orchestrator.applied_configs, command['configurations'])
+    self.assertEqual(command['configurations']['oozie-site']['b_port'], "0")
+    self.assertEqual(orchestrator.stored_command, command)
 
 
   def test_port_allocation(self):
diff --git a/slider-agent/src/test/python/agent/TestGrep.py b/slider-agent/src/test/python/agent/TestGrep.py
index 75f0093..351befb 100644
--- a/slider-agent/src/test/python/agent/TestGrep.py
+++ b/slider-agent/src/test/python/agent/TestGrep.py
@@ -19,7 +19,7 @@
 '''
 
 from unittest import TestCase
-from agent.Grep import Grep
+from Grep import Grep
 import socket
 import os, sys
 import logging
diff --git a/slider-agent/src/test/python/agent/TestHeartbeat.py b/slider-agent/src/test/python/agent/TestHeartbeat.py
index b60c14c..b012218 100644
--- a/slider-agent/src/test/python/agent/TestHeartbeat.py
+++ b/slider-agent/src/test/python/agent/TestHeartbeat.py
@@ -30,7 +30,7 @@
 import StringIO
 import sys
 import logging
-
+from Controller import State
 
 class TestHeartbeat(TestCase):
   def setUp(self):
@@ -64,7 +64,7 @@
     self.assertEquals(result['nodeStatus']['cause'], "NONE")
     self.assertEquals(result['nodeStatus']['status'], "HEALTHY")
     # result may or may NOT have an agentEnv structure in it
-    self.assertEquals((len(result) is 5) or (len(result) is 6), True)
+    self.assertEquals((len(result) is 6) or (len(result) is 7), True)
     self.assertEquals(not heartbeat.reports, True,
                       "Heartbeat should not contain task in progress")
 
@@ -85,7 +85,8 @@
                    'role': u'DATANODE',
                    'actionId': '1-1',
                    'taskId': 3,
-                   'exitcode': 777},
+                   'exitcode': 777,
+                   'reportResult' : True},
 
                   {'status': 'COMPLETED',
                    'stderr': 'stderr',
@@ -96,7 +97,8 @@
                    'role': 'role',
                    'actionId': 17,
                    'taskId': 'taskId',
-                   'exitcode': 0},
+                   'exitcode': 0,
+                   'reportResult' : True},
 
                   {'status': 'FAILED',
                    'stderr': 'stderr',
@@ -107,7 +109,8 @@
                    'role': u'DATANODE',
                    'actionId': '1-1',
                    'taskId': 3,
-                   'exitcode': 13},
+                   'exitcode': 13,
+                   'reportResult' : True},
 
                   {'status': 'COMPLETED',
                    'stderr': 'stderr',
@@ -119,8 +122,21 @@
                    'role': u'DATANODE',
                    'actionId': '1-1',
                    'taskId': 3,
-                   'exitcode': 0}
+                   'exitcode': 0,
+                   'reportResult' : True},
 
+                  {'status': 'COMPLETED',
+                   'stderr': 'stderr',
+                   'stdout': 'out',
+                   'clusterName': u'cc',
+                   'configurationTags': {'global': {'tag': 'v1'}},
+                   'roleCommand': u'INSTALL',
+                   'serviceName': u'HDFS',
+                   'role': u'DATANODE',
+                   'actionId': '1-1',
+                   'taskId': 3,
+                   'exitcode': 0,
+                   'reportResult' : False}
       ],
       'componentStatus': [
         {'status': 'HEALTHY', 'componentName': 'DATANODE', 'reportResult' : True},
@@ -129,6 +145,7 @@
       ],
     }
     heartbeat = Heartbeat(actionQueue, config)
+    # State.STARTED results in agentState to be set to 4 (enum order)
     hb = heartbeat.build({}, 10)
     hb['hostname'] = 'hostname'
     hb['timestamp'] = 'timestamp'
@@ -157,8 +174,84 @@
        'stderr': 'stderr'}],  'componentStatus': [
       {'status': 'HEALTHY', 'componentName': 'DATANODE'},
       {'status': 'UNHEALTHY', 'componentName': 'NAMENODE'}]}
+    self.assertEqual.__self__.maxDiff = None
     self.assertEquals(hb, expected)
 
+  @patch.object(ActionQueue, "result")
+  def test_build_result2(self, result_mock):
+    config = AgentConfig("", "")
+    config.set('agent', 'prefix', 'tmp')
+    dummy_controller = MagicMock()
+    actionQueue = ActionQueue(config, dummy_controller)
+    result_mock.return_value = {
+      'reports': [{'status': 'IN_PROGRESS',
+                   'stderr': 'Read from /tmp/errors-3.txt',
+                   'stdout': 'Read from /tmp/output-3.txt',
+                   'clusterName': u'cc',
+                   'roleCommand': u'INSTALL',
+                   'serviceName': u'HDFS',
+                   'role': u'DATANODE',
+                   'actionId': '1-1',
+                   'taskId': 3,
+                   'exitcode': 777,
+                   'reportResult' : False}
+      ],
+      'componentStatus': []
+      }
+    heartbeat = Heartbeat(actionQueue, config)
+
+    commandResult = {}
+    hb = heartbeat.build(commandResult, 10)
+    hb['hostname'] = 'hostname'
+    hb['timestamp'] = 'timestamp'
+    hb['fqdn'] = 'fqdn'
+    expected = {'nodeStatus':
+                  {'status': 'HEALTHY',
+                   'cause': 'NONE'},
+                'timestamp': 'timestamp', 'hostname': 'hostname', 'fqdn': 'fqdn',
+                'responseId': 10, 'reports': []}
+    self.assertEqual.__self__.maxDiff = None
+    self.assertEquals(hb, expected)
+    self.assertEquals(commandResult, {'commandStatus': 'IN_PROGRESS'})
+
+  @patch.object(ActionQueue, "result")
+  def test_build_result3(self, result_mock):
+    config = AgentConfig("", "")
+    config.set('agent', 'prefix', 'tmp')
+    dummy_controller = MagicMock()
+    actionQueue = ActionQueue(config, dummy_controller)
+    result_mock.return_value = {
+      'reports': [{'status': 'COMPLETED',
+                   'stderr': 'Read from /tmp/errors-3.txt',
+                   'stdout': 'Read from /tmp/output-3.txt',
+                   'clusterName': u'cc',
+                   'roleCommand': u'INSTALL',
+                   'serviceName': u'HDFS',
+                   'role': u'DATANODE',
+                   'actionId': '1-1',
+                   'taskId': 3,
+                   'exitcode': 777,
+                   'reportResult' : False}
+      ],
+      'componentStatus': []
+    }
+    heartbeat = Heartbeat(actionQueue, config)
+
+    commandResult = {}
+    hb = heartbeat.build(commandResult, 10)
+    hb['hostname'] = 'hostname'
+    hb['timestamp'] = 'timestamp'
+    hb['fqdn'] = 'fqdn'
+    expected = {'nodeStatus':
+                  {'status': 'HEALTHY',
+                   'cause': 'NONE'},
+                'timestamp': 'timestamp', 'hostname': 'hostname', 'fqdn': 'fqdn',
+                'responseId': 10, 'reports': []}
+    self.assertEqual.__self__.maxDiff = None
+    self.assertEquals(hb, expected)
+    self.assertEquals(commandResult, {'commandStatus': 'COMPLETED'})
+
+
 
 if __name__ == "__main__":
   logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
diff --git a/slider-agent/src/test/python/agent/TestMain.py b/slider-agent/src/test/python/agent/TestMain.py
index 9ef1cad..bc68582 100644
--- a/slider-agent/src/test/python/agent/TestMain.py
+++ b/slider-agent/src/test/python/agent/TestMain.py
@@ -20,19 +20,20 @@
 import StringIO
 import sys
 
-from agent import NetUtil, security
+import NetUtil, security
 from mock.mock import MagicMock, patch, ANY
 import unittest
-from agent import ProcessHelper, main
+import ProcessHelper, main
 import logging
 import signal
-from agent.AgentConfig import AgentConfig
+from AgentConfig import AgentConfig
 import ConfigParser
 import os
 import tempfile
-from agent.Controller import Controller
+from Controller import Controller
 from optparse import OptionParser
 
+logger = logging.getLogger()
 
 class TestMain(unittest.TestCase):
   def setUp(self):
@@ -148,8 +149,7 @@
   @patch("sys.exit")
   @patch("os.path.isfile")
   @patch("os.path.isdir")
-  @patch("hostname.hostname")
-  def test_perform_prestart_checks(self, hostname_mock, isdir_mock, isfile_mock,
+  def test_perform_prestart_checks(self, isdir_mock, isfile_mock,
                                    exit_mock, remove_mock):
     main.config = AgentConfig("", "")
 
@@ -221,7 +221,6 @@
 
   @patch.object(main, "setup_logging")
   @patch.object(main, "bind_signal_handlers")
-  @patch.object(main, "stop_agent")
   @patch.object(main, "update_config_from_file")
   @patch.object(main, "perform_prestart_checks")
   @patch.object(main, "write_pid")
@@ -231,13 +230,15 @@
   @patch.object(Controller, "start")
   @patch.object(Controller, "join")
   @patch("optparse.OptionParser.parse_args")
-  def test_main(self, parse_args_mock, join_mock, start_mock,
+  @patch.object(Controller, "is_alive")
+  def test_main(self, isAlive_mock, parse_args_mock, join_mock, start_mock,
                 Controller_init_mock, try_to_connect_mock,
                 update_log_level_mock, write_pid_mock,
                 perform_prestart_checks_mock,
-                update_config_from_file_mock, stop_mock,
+                update_config_from_file_mock,
                 bind_signal_handlers_mock, setup_logging_mock):
     Controller_init_mock.return_value = None
+    isAlive_mock.return_value = False
     options = MagicMock()
     parse_args_mock.return_value = (options, MagicMock)
 
@@ -245,25 +246,24 @@
 
     #testing call without command-line arguments
     os.environ["AGENT_WORK_ROOT"] = os.path.join(tmpdir, "work")
-    os.environ["AGENT_LOG_ROOT"] = os.path.join(tmpdir, "log")
+    os.environ["AGENT_LOG_ROOT"] = ",".join([os.path.join(tmpdir, "log"),os.path.join(tmpdir, "log2")])
     main.main()
 
     self.assertTrue(setup_logging_mock.called)
     self.assertTrue(bind_signal_handlers_mock.called)
-    self.assertTrue(stop_mock.called)
     self.assertTrue(update_config_from_file_mock.called)
     self.assertTrue(perform_prestart_checks_mock.called)
     self.assertTrue(write_pid_mock.called)
     self.assertTrue(update_log_level_mock.called)
+    self.assertTrue(options.log_folder == os.path.join(tmpdir, "log"))
     try_to_connect_mock.assert_called_once_with(ANY, -1, ANY)
     self.assertTrue(start_mock.called)
 
   class AgentOptions:
-      def __init__(self, label, host, port, secured_port, verbose, debug):
+      def __init__(self, label, zk_quorum, zk_reg_path, verbose, debug):
           self.label = label
-          self.host = host
-          self.port = port
-          self.secured_port = secured_port
+          self.zk_quorum = zk_quorum
+          self.zk_reg_path = zk_reg_path
           self.verbose = verbose
           self.debug = debug
 
@@ -281,7 +281,7 @@
   @patch.object(Controller, "join")
   @patch.object(Controller, "is_alive")
   @patch("optparse.OptionParser.parse_args")
-  def test_main(self, parse_args_mock, isAlive_mock, join_mock, start_mock,
+  def test_main2(self, parse_args_mock, isAlive_mock, join_mock, start_mock,
                 Controller_init_mock, AgentConfig_set_mock,
                 try_to_connect_mock,
                 update_log_level_mock, write_pid_mock,
@@ -291,18 +291,18 @@
       Controller_init_mock.return_value = None
       isAlive_mock.return_value = False
       parse_args_mock.return_value = (
-          TestMain.AgentOptions("agent", "host1", "8080", "8081", True, ""), [])
+          TestMain.AgentOptions("agent", "host1:2181", "/registry/org-apache-slider/cl1", True, ""), [])
       tmpdir = tempfile.gettempdir()
 
       #testing call without command-line arguments
       os.environ["AGENT_WORK_ROOT"] = os.path.join(tmpdir, "work")
       os.environ["AGENT_LOG_ROOT"] = os.path.join(tmpdir, "log")
       main.main()
-      self.assertTrue(AgentConfig_set_mock.call_count == 4)
-      AgentConfig_set_mock.assert_any_call("server", "hostname", "host1")
-      AgentConfig_set_mock.assert_any_call("server", "port", "8080")
-      AgentConfig_set_mock.assert_any_call("server", "secured_port", "8081")
+      self.assertTrue(AgentConfig_set_mock.call_count == 3)
+      AgentConfig_set_mock.assert_any_call("server", "zk_quorum", "host1:2181")
+      AgentConfig_set_mock.assert_any_call("server", "zk_reg_path", "/registry/org-apache-slider/cl1")
 
 
 if __name__ == "__main__":
+  logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
   unittest.main()
\ No newline at end of file
diff --git a/slider-agent/src/test/python/agent/TestRegistration.py b/slider-agent/src/test/python/agent/TestRegistration.py
index 356e480..f91fe29 100644
--- a/slider-agent/src/test/python/agent/TestRegistration.py
+++ b/slider-agent/src/test/python/agent/TestRegistration.py
@@ -25,8 +25,9 @@
 import tempfile
 from mock.mock import patch
 from mock.mock import MagicMock
-from agent.Register import Register
-from agent.AgentConfig import AgentConfig
+from Register import Register
+from Controller import State
+from AgentConfig import AgentConfig
 
 class TestRegistration(TestCase):
 
@@ -47,20 +48,23 @@
       text_file.write("1.3.0")
 
     register = Register(config)
-    data = register.build(1)
+    data = register.build(State.INIT, State.INIT, {}, 1)
     #print ("Register: " + pprint.pformat(data))
     self.assertEquals(data['hostname'] != "", True, "hostname should not be empty")
     self.assertEquals(data['publicHostname'] != "", True, "publicHostname should not be empty")
     self.assertEquals(data['responseId'], 1)
     self.assertEquals(data['timestamp'] > 1353678475465L, True, "timestamp should not be empty")
     self.assertEquals(data['agentVersion'], '1.3.0', "agentVersion should not be empty")
-    self.assertEquals(len(data), 5)
+    self.assertEquals(data['actualState'], State.INIT, "actualState should not be empty")
+    self.assertEquals(data['expectedState'], State.INIT, "expectedState should not be empty")
+    self.assertEquals(data['allocatedPorts'], {}, "allocatedPorts should be empty")
+    self.assertEquals(len(data), 8)
 
     self.assertEquals(os.path.join(tmpdir, "app/definition"), config.getResolvedPath("app_pkg_dir"))
     self.assertEquals(os.path.join(tmpdir, "app/install"), config.getResolvedPath("app_install_dir"))
-    self.assertEquals(os.path.join(ver_dir, "app/log"), config.getResolvedPath("app_log_dir"))
-    self.assertEquals(os.path.join(ver_dir, "infra/log"), config.getResolvedPath("log_dir"))
-    self.assertEquals(os.path.join(ver_dir, "app/command-log"), config.getResolvedPath("app_task_dir"))
+    self.assertEquals(os.path.join(ver_dir, "."), config.getResolvedPath("app_log_dir"))
+    self.assertEquals(os.path.join(ver_dir, "."), config.getResolvedPath("log_dir"))
+    self.assertEquals(os.path.join(ver_dir, "."), config.getResolvedPath("app_task_dir"))
 
     os.remove(ver_file)
     os.removedirs(ver_dir)
diff --git a/slider-agent/src/test/python/agent/TestShell.py b/slider-agent/src/test/python/agent/TestShell.py
index 32a8d11..8caed7b 100644
--- a/slider-agent/src/test/python/agent/TestShell.py
+++ b/slider-agent/src/test/python/agent/TestShell.py
@@ -26,36 +26,47 @@
 from agent import shell
 from sys import platform as _platform
 import subprocess, time
+import sys
+import platform
 
 class TestShell(unittest.TestCase):
+  unsupported_for_test = []
 
+  def linux_distribution(self):
+    PYTHON_VER = sys.version_info[0] * 10 + sys.version_info[1]
 
-  @patch("os.setuid")
-  def test_changeUid(self, os_setUIDMock):
-    shell.threadLocal.uid = 9999
-    shell.changeUid()
-    self.assertTrue(os_setUIDMock.called)
+    if PYTHON_VER < 26:
+      linux_dist = platform.dist()
+    else:
+      linux_dist = platform.linux_distribution()
+
+    return linux_dist
 
 
   def test_kill_process_with_children(self):
+    dist = self.linux_distribution()
+    operatingSystem = dist[0].lower()
+    if operatingSystem in self.unsupported_for_test:
+      return
+
     if _platform == "linux" or _platform == "linux2": # Test is Linux-specific
       gracefull_kill_delay_old = shell.gracefull_kill_delay
       shell.gracefull_kill_delay = 0.1
-      sleep_cmd = "sleep 314159265"
-      test_cmd = """ (({0}) | ({0} | {0})) """.format(sleep_cmd)
+      sleep_cmd = "sleep 10"
+      test_cmd = """ (({0}) & ({0} & {0})) """.format(sleep_cmd)
       # Starting process tree (multiple process groups)
-      test_process = subprocess.Popen(test_cmd, shell=True)
+      test_process = subprocess.Popen(test_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
       time.sleep(0.3) # Delay to allow subprocess to start
       # Check if processes are running
-      ps_cmd = """ps aux | grep "{0}" | grep -v grep """.format(sleep_cmd)
-      ps_process = subprocess.Popen(ps_cmd, stdout=subprocess.PIPE, shell=True)
+      ps_cmd = """ps aux """
+      ps_process = subprocess.Popen(ps_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
       (out, err) = ps_process.communicate()
       self.assertTrue(sleep_cmd in out)
       # Kill test process
       shell.kill_process_with_children(test_process.pid)
       test_process.communicate()
       # Now test process should not be running
-      ps_process = subprocess.Popen(ps_cmd, stdout=subprocess.PIPE, shell=True)
+      ps_process = subprocess.Popen(ps_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
       (out, err) = ps_process.communicate()
       self.assertFalse(sleep_cmd in out)
       shell.gracefull_kill_delay = gracefull_kill_delay_old
diff --git a/slider-agent/src/test/python/resource_management/TestDirectoryResource.py b/slider-agent/src/test/python/resource_management/TestDirectoryResource.py
index 866486e..d9a262c 100644
--- a/slider-agent/src/test/python/resource_management/TestDirectoryResource.py
+++ b/slider-agent/src/test/python/resource_management/TestDirectoryResource.py
@@ -26,7 +26,7 @@
 from resource_management.core.resources import Directory
 
 @patch.object(System, "os_family", new = 'redhat')
-class TestFileResource(TestCase):
+class TestDirectoryResource(TestCase):
   
   @patch.object(os.path, "exists")
   @patch.object(os, "makedirs")
diff --git a/slider-agent/src/test/python/resource_management/TestGroupResource.py b/slider-agent/src/test/python/resource_management/TestGroupResource.py.disabled
similarity index 100%
rename from slider-agent/src/test/python/resource_management/TestGroupResource.py
rename to slider-agent/src/test/python/resource_management/TestGroupResource.py.disabled
diff --git a/slider-agent/src/test/python/resource_management/TestUserResource.py b/slider-agent/src/test/python/resource_management/TestUserResource.py.disabled
similarity index 100%
rename from slider-agent/src/test/python/resource_management/TestUserResource.py
rename to slider-agent/src/test/python/resource_management/TestUserResource.py.disabled
diff --git a/slider-agent/src/test/python/unitTests.py b/slider-agent/src/test/python/unitTests.py
index 0d822fd..e3f2d7c 100644
--- a/slider-agent/src/test/python/unitTests.py
+++ b/slider-agent/src/test/python/unitTests.py
@@ -54,7 +54,8 @@
   for directory in os.listdir(src_dir):
     if os.path.isdir(directory) and not directory in ignoredDirs:
       files_list += os.listdir(src_dir + os.sep + directory)
-  shuffle(files_list)
+  ## temporarily deleting to add more predictability
+  ## shuffle(files_list)
   tests_list = []
 
   logger.info('------------------------TESTS LIST:-------------------------------------')
diff --git a/slider-assembly/pom.xml b/slider-assembly/pom.xml
index 0ec91dd..fab8230 100644
--- a/slider-assembly/pom.xml
+++ b/slider-assembly/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
   </parent>
 
 
@@ -141,26 +141,12 @@
         </configuration>
         <executions>
           <execution>
-            <id>clean</id>
-            <goals>
-              <goal>enforce</goal>
-            </goals>
-            <phase>pre-clean</phase>
-          </execution>
-          <execution>
             <id>default</id>
             <goals>
               <goal>enforce</goal>
             </goals>
             <phase>validate</phase>
           </execution>
-          <execution>
-            <id>site</id>
-            <goals>
-              <goal>enforce</goal>
-            </goals>
-            <phase>pre-site</phase>
-          </execution>
         </executions>
       </plugin>
 
@@ -308,9 +294,9 @@
                   <sources>
                     <source>
                       <location>${project.build.directory}/agent</location>
-      <includes>
-        <include>slider-agent.tar.gz</include>
-      </includes>
+                      <includes>
+                        <include>slider-agent.tar.gz</include>
+                      </includes>
                     </source>
                   </sources>
                 </mapping>
diff --git a/slider-install/README.md b/slider-assembly/src/main/bash/README.md
similarity index 100%
rename from slider-install/README.md
rename to slider-assembly/src/main/bash/README.md
diff --git a/slider-install/src/main/bash/slider-client.xml b/slider-assembly/src/main/bash/slider-client.xml
similarity index 100%
rename from slider-install/src/main/bash/slider-client.xml
rename to slider-assembly/src/main/bash/slider-client.xml
diff --git a/slider-install/src/main/bash/slider_destroy b/slider-assembly/src/main/bash/slider_destroy
similarity index 100%
rename from slider-install/src/main/bash/slider_destroy
rename to slider-assembly/src/main/bash/slider_destroy
diff --git a/slider-install/src/main/bash/slider_setup b/slider-assembly/src/main/bash/slider_setup
similarity index 100%
rename from slider-install/src/main/bash/slider_setup
rename to slider-assembly/src/main/bash/slider_setup
diff --git a/slider-install/src/main/bash/slider_setup.conf b/slider-assembly/src/main/bash/slider_setup.conf
similarity index 100%
rename from slider-install/src/main/bash/slider_setup.conf
rename to slider-assembly/src/main/bash/slider_setup.conf
diff --git a/slider-core/pom.xml b/slider-core/pom.xml
index 95a7e71..9a16ab3 100644
--- a/slider-core/pom.xml
+++ b/slider-core/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
   </parent>
 
   <build>
@@ -62,14 +62,6 @@
       
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
@@ -317,48 +309,7 @@
       <artifactId>hadoop-minicluster</artifactId>
       <scope>test</scope>
     </dependency>
-<!--
 
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-client</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <scope>test</scope>
-    </dependency>
-
--->
-
-<!--
-
-    <dependency>
-      <groupId>org.apache.accumulo</groupId>
-      <artifactId>accumulo-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.accumulo</groupId>
-      <artifactId>accumulo-minicluster</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.accumulo</groupId>
-      <artifactId>accumulo-start</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.accumulo</groupId>
-      <artifactId>accumulo-trace</artifactId>
-      <scope>test</scope>
-    </dependency>
--->
 
     <dependency>
       <groupId>junit</groupId>
@@ -406,6 +357,11 @@
       <artifactId>commons-logging</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>com.codahale.metrics</groupId>
+      <artifactId>metrics-core</artifactId>
+    </dependency>
+    
     <!-- ======================================================== -->
     <!-- service registry -->
     <!-- ======================================================== -->
@@ -450,6 +406,11 @@
     </dependency>
 
     <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>com.sun.jersey</groupId>
       <artifactId>jersey-client</artifactId>
     </dependency>
@@ -510,8 +471,17 @@
 
     <dependency>
       <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
       <artifactId>jetty-sslengine</artifactId>
-      <scope>compile</scope>
     </dependency>
 
   </dependencies>
diff --git a/slider-core/src/main/java/org/apache/slider/Slider.java b/slider-core/src/main/java/org/apache/slider/Slider.java
index 0d25f00..5fc8618 100644
--- a/slider-core/src/main/java/org/apache/slider/Slider.java
+++ b/slider-core/src/main/java/org/apache/slider/Slider.java
@@ -42,7 +42,7 @@
     //turn the args to a list
     List<String> argsList = Arrays.asList(args);
     //create a new list, as the ArrayList type doesn't push() on an insert
-    List<String> extendedArgs = new ArrayList<>(argsList);
+    List<String> extendedArgs = new ArrayList<String>(argsList);
     //insert the service name
     extendedArgs.add(0, SERVICE_CLASSNAME);
     //now have the service launcher do its work
diff --git a/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java b/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java
index d5869a6..d875d66 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java
@@ -165,40 +165,40 @@
    * the Slider AM and the application that it deploys
    */
   public Map<String, String> options =
-    new HashMap<>();
+    new HashMap<String, String>();
 
   /**
    * cluster information
    * This is only valid when querying the cluster status.
    */
   public Map<String, String> info =
-    new HashMap<>();
+    new HashMap<String, String>();
 
   /**
    * Statistics. This is only relevant when querying the cluster status
    */
   public Map<String, Map<String, Integer>> statistics =
-    new HashMap<>();
+    new HashMap<String, Map<String, Integer>>();
 
   /**
    * Instances: role->count
    */
   public Map<String, List<String>> instances =
-    new HashMap<>();
+    new HashMap<String, List<String>>();
 
   /**
    * Role options, 
    * role -> option -> value
    */
   public Map<String, Map<String, String>> roles =
-    new HashMap<>();
+    new HashMap<String, Map<String, String>>();
 
 
   /**
    * List of key-value pairs to add to a client config to set up the client
    */
   public Map<String, String> clientProperties =
-    new HashMap<>();
+    new HashMap<String, String>();
 
   /**
    * Status information
@@ -569,7 +569,7 @@
    */
   @JsonIgnore
   public Set<String> getRoleNames() {
-    return new HashSet<>(roles.keySet());
+    return new HashSet<String>(roles.keySet());
   }
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java b/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java
index 7e73a92..21ece2b 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ClusterDescriptionOperations.java
@@ -64,15 +64,15 @@
     MapOperations appOptions =
       aggregateConf.getAppConfOperations().getGlobalOptions();
 
-    cd.type = internalOptions.getOption(OptionKeys.INTERNAL_PROVIDER_NAME,
+    cd.type = internalOptions.getOption(InternalKeys.INTERNAL_PROVIDER_NAME,
                                 SliderProviderFactory.DEFAULT_CLUSTER_TYPE);
 
-    cd.dataPath = internalOptions.get(OptionKeys.INTERNAL_DATA_DIR_PATH);
+    cd.dataPath = internalOptions.get(InternalKeys.INTERNAL_DATA_DIR_PATH);
     cd.name = internalOptions.get(OptionKeys.APPLICATION_NAME);
-    cd.originConfigurationPath = internalOptions.get(OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH);
-    cd.generatedConfigurationPath = internalOptions.get(OptionKeys.INTERNAL_GENERATED_CONF_PATH);
-    cd.setImagePath(internalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH));
-    cd.setApplicationHome(internalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME));
+    cd.originConfigurationPath = internalOptions.get(InternalKeys.INTERNAL_SNAPSHOT_CONF_PATH);
+    cd.generatedConfigurationPath = internalOptions.get(InternalKeys.INTERNAL_GENERATED_CONF_PATH);
+    cd.setImagePath(internalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH));
+    cd.setApplicationHome(internalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME));
     cd.setZkPath(appOptions.get(ZOOKEEPER_PATH));
     cd.setZkHosts(appOptions.get(ZOOKEEPER_QUORUM));
     
diff --git a/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
new file mode 100644
index 0000000..ad384e2
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.api;
+
+/**
+ * Keys for internal use, go into `internal.json` and not intended for normal
+ * use except when tuning Slider AM operations
+ */
+public interface InternalKeys {
+
+
+  /**
+   * Home dir of the app: {@value}
+   * If set, implies there is a home dir to use
+   */
+  String INTERNAL_APPLICATION_HOME = "internal.application.home";
+  /**
+   * Path to an image file containing the app: {@value}
+   */
+  String INTERNAL_APPLICATION_IMAGE_PATH = "internal.application.image.path";
+  /**
+   * Time in milliseconds to wait after forking any in-AM 
+   * process before attempting to start up the containers: {@value}
+   * 
+   * A shorter value brings the cluster up faster, but means that if the
+   * in AM process fails (due to a bad configuration), then time
+   * is wasted starting containers on a cluster that isn't going to come
+   * up
+   */
+  String INTERNAL_CONTAINER_STARTUP_DELAY = "internal.container.startup.delay";
+  /**
+   * internal temp directory: {@value}
+   */
+  String INTERNAL_AM_TMP_DIR = "internal.tmp.dir";
+  /**
+   * where a snapshot of the original conf dir is: {@value}
+   */
+  String INTERNAL_SNAPSHOT_CONF_PATH = "internal.snapshot.conf.path";
+  /**
+   * where a snapshot of the original conf dir is: {@value}
+   */
+  String INTERNAL_GENERATED_CONF_PATH = "internal.generated.conf.path";
+  /**
+   * where a snapshot of the original conf dir is: {@value}
+   */
+  String INTERNAL_PROVIDER_NAME = "internal.provider.name";
+  /**
+   * where a snapshot of the original conf dir is: {@value}
+   */
+  String INTERNAL_DATA_DIR_PATH = "internal.data.dir.path";
+  /**
+   * Time in milliseconds to wait after forking any in-AM 
+   * process before attempting to start up the containers: {@value}
+   *
+   * A shorter value brings the cluster up faster, but means that if the
+   * in AM process fails (due to a bad configuration), then time
+   * is wasted starting containers on a cluster that isn't going to come
+   * up
+   */
+  int DEFAULT_INTERNAL_CONTAINER_STARTUP_DELAY = 5000;
+  /**
+   * Version of the app: {@value}
+   */
+  String KEYTAB_LOCATION = "internal.keytab.location";
+
+
+  /**
+   * Flag to indicate whether or not the chaos monkey is enabled:
+   * {@value}
+   */
+  String CHAOS_MONKEY_ENABLED = "internal.chaos.monkey.enabled";
+  boolean DEFAULT_CHAOS_MONKEY_ENABLED = false;
+
+
+  /**
+   * Rate
+   */
+
+  String CHAOS_MONKEY_INTERVAL = "internal.chaos.monkey.interval";
+  String CHAOS_MONKEY_INTERVAL_DAYS = CHAOS_MONKEY_INTERVAL + ".days";
+  String CHAOS_MONKEY_INTERVAL_HOURS = CHAOS_MONKEY_INTERVAL + ".hours";
+  String CHAOS_MONKEY_INTERVAL_MINUTES = CHAOS_MONKEY_INTERVAL + ".minutes";
+  String CHAOS_MONKEY_INTERVAL_SECONDS = CHAOS_MONKEY_INTERVAL + ".seconds";
+  
+  int DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS = 0;
+  int DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS = 0;
+  int DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES = 0;
+
+  /**
+   * Prefix for all chaos monkey probabilities
+   */
+  String CHAOS_MONKEY_PROBABILITY =
+      "internal.chaos.monkey.probability";
+  /**
+   * Probabilies are out of 10000 ; 100==1%
+   */
+
+  /**
+   * Probability of a monkey check killing the AM:  {@value}
+   */
+  String CHAOS_MONKEY_PROBABILITY_AM_FAILURE = CHAOS_MONKEY_PROBABILITY +".amfailure";
+
+  /**
+   * Default probability of a monkey check killing the AM:  {@value}
+   */
+  int DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE = 0;
+
+  /**
+   * Probability of a monkey check killing a container:  {@value}
+   */
+
+  String CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE =
+      CHAOS_MONKEY_PROBABILITY + ".containerfailure";
+
+  /**
+   * Default probability of a monkey check killing the a container:  {@value}
+   */
+  int DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE = 0;
+
+
+}
diff --git a/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java b/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java
index 048fefa..a035a99 100644
--- a/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/OptionKeys.java
@@ -22,31 +22,9 @@
  *  Keys for entries in the <code>options</code> section
  *  of a cluster description.
  */
-public interface OptionKeys {
+public interface OptionKeys extends InternalKeys {
 
   /**
-   * Home dir of the app: {@value}
-   * If set, implies there is a home dir to use
-   */
-  String INTERNAL_APPLICATION_HOME = "internal.application.home";
-  
-  /**
-   * Path to an image file containing the app: {@value}
-   */
-  String INTERNAL_APPLICATION_IMAGE_PATH = "internal.application.image.path";
-
-  /**
-   * Time in milliseconds to wait after forking any in-AM 
-   * process before attempting to start up the containers: {@value}
-   * 
-   * A shorter value brings the cluster up faster, but means that if the
-   * in AM process fails (due to a bad configuration), then time
-   * is wasted starting containers on a cluster that isn't going to come
-   * up
-   */
-  String INTERNAL_CONTAINER_STARTUP_DELAY = "internal.container.startup.delay";
-  
-  /**
    * Time in milliseconds to wait after forking any in-AM 
    * process before attempting to start up the containers: {@value}
    * 
@@ -57,82 +35,15 @@
    */
   String APPLICATION_TYPE = "application.type";
   
-  /**
-   * Time in milliseconds to wait after forking any in-AM 
-   * process before attempting to start up the containers: {@value}
-   * 
-   * A shorter value brings the cluster up faster, but means that if the
-   * in AM process fails (due to a bad configuration), then time
-   * is wasted starting containers on a cluster that isn't going to come
-   * up
-   */
   String APPLICATION_NAME = "application.name";
 
   /**
-   * Time in milliseconds before a container is considered long-lived.
-   * Shortlived containers are interpreted as a problem with the role
-   * and/or the host: {@value}
-   */
-  String INTERNAL_CONTAINER_FAILURE_SHORTLIFE = "internal.container.failure.shortlife";
-
-  /**
-   * Default short life threshold: {@value}
-   */
-  int DEFAULT_CONTAINER_FAILURE_SHORTLIFE = 60;
-
-  /**
-   * maximum number of failed containers (in a single role)
-   * before the cluster is deemed to have failed {@value}
-   */
-  String INTERNAL_CONTAINER_FAILURE_THRESHOLD = "internal.container.failure.threshold";
-
-  /**
-   * Default failure threshold: {@value}
-   */
-  int DEFAULT_CONTAINER_FAILURE_THRESHOLD = 5;
-
-  /**
-   * delay for container startup:{@value}
-   */
-  int DEFAULT_CONTAINER_STARTUP_DELAY = 5000;
-
-  /**
-   * Version of the app: {@value}
-   */
-  String KEYTAB_LOCATION = "internal.keytab.location";
-
-  /**
    * Prefix for site.xml options: {@value}
    */
   String SITE_XML_PREFIX = "site.";
 
-  /**
-   * internal temp directory: {@value}
-   */
-  String INTERNAL_AM_TMP_DIR = "internal.tmp.dir";
 
   /**
-   * where a snapshot of the original conf dir is: {@value}
-   */
-  String INTERNAL_SNAPSHOT_CONF_PATH = "internal.snapshot.conf.path";
-  
-  /**
-   * where a snapshot of the original conf dir is: {@value}
-   */
-  String INTERNAL_GENERATED_CONF_PATH = "internal.generated.conf.path";
-    
-  /**
-   * where a snapshot of the original conf dir is: {@value}
-   */
-  String INTERNAL_PROVIDER_NAME = "internal.provider.name";
-  
-    
-  /**
-   * where a snapshot of the original conf dir is: {@value}
-   */
-  String INTERNAL_DATA_DIR_PATH = "internal.data.dir.path";
-  
-  /**
    * Zookeeper quorum host list: {@value}
    */
   String ZOOKEEPER_QUORUM = "zookeeper.quorum";
diff --git a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
index 1c914cb..3d54140 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
@@ -20,6 +20,25 @@
 
 /**
  * These are the keys valid in resource options
+ *
+ /*
+
+ Container failure window.
+
+ The window is calculated in minutes as as (days * 24 *60 + hours* 24 + minutes)
+
+ Every interval of this period after the AM is started/restarted becomes
+ the time period in which the CONTAINER_FAILURE_THRESHOLD value is calculated.
+ 
+ After the window limit is reached, the failure counts are reset. This
+ is not a sliding window/moving average policy, simply a rule such as
+ "every six hours the failure count is reset"
+
+
+ <pre>
+ ===========================================================================
+ </pre>
+
  */
 public interface ResourceKeys {
 
@@ -69,4 +88,47 @@
    * placement policy
    */
   String COMPONENT_PLACEMENT_POLICY = "yarn.component.placement.policy";
+
+  
+
+  /**
+   * Time in seconds before a container is considered long-lived.
+   * Shortlived containers are interpreted as a problem with the role
+   * and/or the host: {@value}
+   */
+  String CONTAINER_FAILURE_SHORTLIFE =
+      "container.failure.shortlife";
+
+  /**
+   * Default short life threshold: {@value}
+   */
+  int DEFAULT_CONTAINER_FAILURE_SHORTLIFE = 60;
+
+  /**
+   * maximum number of failed containers (in a single role)
+   * before the cluster is deemed to have failed {@value}
+   */
+  String CONTAINER_FAILURE_THRESHOLD =
+      "yarn.container.failure.threshold";
+
+  /**
+   * prefix for the time of the container failure reset window.
+   * {@value}
+   */
+
+  String CONTAINER_FAILURE_WINDOW =
+      "yarn.container.failure.window";
+
+
+
+  int DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS = 0;
+  int DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS = 6;
+  int DEFAULT_CONTAINER_FAILURE_WINDOW_MINUTES = 0;
+
+
+  /**
+   * Default failure threshold: {@value}
+   */
+  int DEFAULT_CONTAINER_FAILURE_THRESHOLD = 5;
+
 }
diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
index e762c1e..93f6207 100644
--- a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
+++ b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
@@ -35,6 +35,7 @@
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.ClusterNode;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.OptionKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.api.SliderClusterProtocol;
@@ -182,7 +183,7 @@
     Configuration clientConf = SliderUtils.loadClientConfigurationResource();
     ConfigHelper.mergeConfigurations(conf, clientConf, CLIENT_RESOURCE);
     serviceArgs.applyDefinitions(conf);
-    serviceArgs.applyFileSystemURL(conf);
+    serviceArgs.applyFileSystemBinding(conf);
     // init security with our conf
     if (SliderUtils.isHadoopClusterSecure(conf)) {
       SliderUtils.forceLogin();
@@ -205,6 +206,8 @@
    * @return the exit code
    * @throws Throwable anything that went wrong
    */
+/* JDK7
+
   @Override
   public int runService() throws Throwable {
 
@@ -217,6 +220,9 @@
       case ACTION_BUILD:
         exitCode = actionBuild(clusterName, serviceArgs.getActionBuildArgs());
         break;
+      case ACTION_UPDATE:
+        exitCode = actionUpdate(clusterName, serviceArgs.getActionUpdateArgs());
+        break;
       case ACTION_CREATE:
         exitCode = actionCreate(clusterName, serviceArgs.getActionCreateArgs());
         break;
@@ -274,6 +280,66 @@
     return exitCode;
   }
 
+*/
+  @Override
+  public int runService() throws Throwable {
+
+    // choose the action
+    String action = serviceArgs.getAction();
+    int exitCode = EXIT_SUCCESS;
+    String clusterName = serviceArgs.getClusterName();
+    // actions
+    if (ACTION_BUILD.equals(action)) {
+      exitCode = actionBuild(clusterName, serviceArgs.getActionBuildArgs());
+    } else if (ACTION_CREATE.equals(action)) {
+      exitCode = actionCreate(clusterName, serviceArgs.getActionCreateArgs());
+    } else if (ACTION_FREEZE.equals(action)) {
+      exitCode = actionFreeze(clusterName,
+          serviceArgs.getActionFreezeArgs());
+    } else if (ACTION_THAW.equals(action)) {
+      exitCode = actionThaw(clusterName, serviceArgs.getActionThawArgs());
+    } else if (ACTION_DESTROY.equals(action)) {
+      exitCode = actionDestroy(clusterName);
+    } else if (ACTION_EXISTS.equals(action)) {
+      exitCode = actionExists(clusterName,
+          serviceArgs.getActionExistsArgs().live);
+    } else if (ACTION_FLEX.equals(action)) {
+      exitCode = actionFlex(clusterName, serviceArgs.getActionFlexArgs());
+    } else if (ACTION_GETCONF.equals(action)) {
+      exitCode = actionGetConf(clusterName, serviceArgs.getActionGetConfArgs());
+    } else if (ACTION_HELP.equals(action) ||
+               ACTION_USAGE.equals(action)) {
+      log.info(serviceArgs.usage());
+
+    } else if (ACTION_KILL_CONTAINER.equals(action)) {
+      exitCode = actionKillContainer(clusterName,
+          serviceArgs.getActionKillContainerArgs());
+
+    } else if (ACTION_AM_SUICIDE.equals(action)) {
+      exitCode = actionAmSuicide(clusterName,
+          serviceArgs.getActionAMSuicideArgs());
+
+    } else if (ACTION_LIST.equals(action)) {
+      exitCode = actionList(clusterName);
+    } else if (ACTION_REGISTRY.equals(action)) {
+      exitCode = actionRegistry(
+          serviceArgs.getActionRegistryArgs());
+    } else if (ACTION_STATUS.equals(action)) {
+      exitCode = actionStatus(clusterName,
+          serviceArgs.getActionStatusArgs());
+    } else if (ACTION_UPDATE.equals(action)) {
+      exitCode = actionUpdate(clusterName, serviceArgs.getActionUpdateArgs());
+
+    } else if (ACTION_VERSION.equals(action)) {
+
+      exitCode = actionVersion();
+    } else {
+      throw new SliderException(EXIT_UNIMPLEMENTED,
+          "Unimplemented: " + action);
+    }
+
+    return exitCode;
+  }
   /**
    * Delete the zookeeper node associated with the calling user and the cluster
    **/
@@ -327,9 +393,12 @@
           client.createPath(zkPath, "", ZooDefs.Ids.OPEN_ACL_UNSAFE,
                             CreateMode.PERSISTENT);
           return zkPath;
+          
+          //JDK7
+//        } catch (InterruptedException | KeeperException e) {
         } catch (InterruptedException e) {
           log.warn("Unable to create zk node {}", zkPath, e);
-        } catch (KeeperException e) {
+        } catch ( KeeperException e) {
           log.warn("Unable to create zk node {}", zkPath, e);
         }
       }
@@ -460,27 +529,42 @@
                                                YarnException,
                                                IOException {
 
-    buildInstanceDefinition(clustername, buildInfo);
+    buildInstanceDefinition(clustername, buildInfo, false, false);
     return EXIT_SUCCESS; 
   }
 
+  /**
+   * Update the cluster specification
+   *
+   * @param clustername cluster name
+   * @param buildInfo the arguments needed to update the cluster
+   * @throws YarnException Yarn problems
+   * @throws IOException other problems
+   */
+  public int actionUpdate(String clustername, AbstractClusterBuildingActionArgs buildInfo) throws
+      YarnException, IOException {
+    buildInstanceDefinition(clustername, buildInfo, true, true);
+    return EXIT_SUCCESS; 
+  }
 
   /**
    * Build up the AggregateConfiguration for an application instance then
    * persists it
    * @param clustername name of the cluster
    * @param buildInfo the arguments needed to build the cluster
+   * @param overwrite true if existing cluster directory can be overwritten
+   * @param liveClusterAllowed true if live cluster can be modified
    * @throws YarnException
    * @throws IOException
    */
   
   public void buildInstanceDefinition(String clustername,
-                                      AbstractClusterBuildingActionArgs buildInfo)
+      AbstractClusterBuildingActionArgs buildInfo, boolean overwrite, boolean liveClusterAllowed)
         throws YarnException, IOException {
     // verify that a live cluster isn't there
     SliderUtils.validateClusterName(clustername);
     verifyBindingsDefined();
-    verifyNoLiveClusters(clustername);
+    if (!liveClusterAllowed) verifyNoLiveClusters(clustername);
 
     Configuration conf = getConfig();
     String registryQuorum = lookupZKQuorum();
@@ -570,6 +654,7 @@
 
     // resource component args
     appConf.merge(cmdLineResourceOptions);
+    resources.merge(cmdLineResourceOptions);
     resources.mergeComponents(buildInfo.getResourceCompOptionMap());
 
     builder.init(providerName, instanceDefinition);
@@ -629,7 +714,7 @@
       throw e;
     }
     try {
-      builder.persist(appconfdir);
+      builder.persist(appconfdir, overwrite);
     } catch (LockAcquireFailedException e) {
       log.warn("Failed to get a Lock on {} : {}", builder, e);
       throw new BadClusterStateException("Failed to save " + clustername
@@ -737,9 +822,6 @@
     return instanceDefinition;
 
   }
-  
-  
-
 
   /**
    *
@@ -779,16 +861,16 @@
       instanceDefinition.getAppConfOperations();
     Path generatedConfDirPath =
       createPathThatMustExist(internalOptions.getMandatoryOption(
-        OptionKeys.INTERNAL_GENERATED_CONF_PATH));
+        InternalKeys.INTERNAL_GENERATED_CONF_PATH));
     Path snapshotConfPath =
       createPathThatMustExist(internalOptions.getMandatoryOption(
-        OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH));
+        InternalKeys.INTERNAL_SNAPSHOT_CONF_PATH));
 
 
     // cluster Provider
     AbstractClientProvider provider = createClientProvider(
       internalOptions.getMandatoryOption(
-        OptionKeys.INTERNAL_PROVIDER_NAME));
+        InternalKeys.INTERNAL_PROVIDER_NAME));
     // make sure the conf dir is valid;
     
     // now build up the image path
@@ -817,8 +899,8 @@
     // set the application name;
     amLauncher.setKeepContainersOverRestarts(true);
 
-    amLauncher.setMaxAppAttempts(config.getInt(KEY_AM_RESTART_LIMIT,
-                                               DEFAULT_AM_RESTART_LIMIT));
+    int maxAppAttempts = config.getInt(KEY_AM_RESTART_LIMIT, 0);
+    amLauncher.setMaxAppAttempts(maxAppAttempts);
 
     sliderFileSystem.purgeAppInstanceTempFiles(clustername);
     Path tempPath = sliderFileSystem.createAppInstanceTempPath(
@@ -990,8 +1072,8 @@
       commandLine.add(Arguments.ARG_RM_ADDR, rmAddr);
     }
 
-    if (serviceArgs.getFilesystemURL() != null) {
-      commandLine.add(Arguments.ARG_FILESYSTEM, serviceArgs.getFilesystemURL());
+    if (serviceArgs.getFilesystemBinding() != null) {
+      commandLine.add(Arguments.ARG_FILESYSTEM, serviceArgs.getFilesystemBinding());
     }
     
     addConfOptionToCLI(commandLine, config, REGISTRY_PATH,
@@ -1366,7 +1448,7 @@
     verifyBindingsDefined();
     SliderUtils.validateClusterName(name);
     log.debug("actionFlex({})", name);
-    Map<String, Integer> roleInstances = new HashMap<>();
+    Map<String, Integer> roleInstances = new HashMap<String, Integer>();
     Map<String, String> roleMap = args.getComponentMap();
     for (Map.Entry<String, String> roleEntry : roleMap.entrySet()) {
       String key = roleEntry.getKey();
@@ -1661,7 +1743,12 @@
           return EXIT_FALSE;
         }
       }
-    } catch (YarnException | IOException e) {
+
+// JDK7    } catch (YarnException | IOException e) {
+    } catch (YarnException e) {
+      log.warn("Exception while waiting for the cluster {} to shut down: {}",
+               clustername, e);
+    } catch ( IOException e) {
       log.warn("Exception while waiting for the cluster {} to shut down: {}",
                clustername, e);
     }
@@ -1725,6 +1812,8 @@
     }
     try {
       String description = "Slider Application Instance " + clustername;
+// JDK7      
+/*
       switch (format) {
         case Arguments.FORMAT_XML:
           Configuration siteConf = getSiteConf(status, clustername);
@@ -1738,6 +1827,17 @@
         default:
           throw new BadCommandArgumentsException("Unknown format: " + format);
       }
+*/
+      if (Arguments.FORMAT_XML.equals(format)) {
+        Configuration siteConf = getSiteConf(status, clustername);
+        siteConf.writeXml(writer);
+      } else if (Arguments.FORMAT_PROPERTIES.equals(format)) {
+        Properties props = new Properties();
+        props.putAll(status.clientProperties);
+        props.store(writer, description);
+      } else {
+          throw new BadCommandArgumentsException("Unknown format: " + format);
+      }
     } finally {
       // data is written.
       // close the file
@@ -1841,10 +1941,8 @@
    * @throws IOException any problems loading -including a missing file
    */
   @VisibleForTesting
-  public AggregateConf loadPersistedClusterDescription(String clustername) throws
-                                                                           IOException,
-      SliderException,
-                                                                           LockAcquireFailedException {
+  public AggregateConf loadPersistedClusterDescription(String clustername)
+      throws IOException, SliderException, LockAcquireFailedException {
     Path clusterDirectory = sliderFileSystem.buildClusterDirPath(clustername);
     ConfPersister persister = new ConfPersister(sliderFileSystem, clusterDirectory);
     AggregateConf instanceDescription = new AggregateConf();
@@ -1921,7 +2019,7 @@
 
     if (uuids.length == 0) {
       // short cut on an empty list
-      return new LinkedList<>();
+      return new LinkedList<ClusterNode>();
     }
     return createClusterOperations().listClusterNodes(uuids);
   }
@@ -2120,7 +2218,7 @@
                                       + serviceType
                                       + " name " + name);
     }
-    List<ServiceInstanceData> sids = new ArrayList<>(size);
+    List<ServiceInstanceData> sids = new ArrayList<ServiceInstanceData>(size);
     for (CuratorServiceInstance<ServiceInstanceData> instance : instances) {
       ServiceInstanceData payload = instance.payload;
       logInstance(payload, registryArgs.verbose);
@@ -2305,7 +2403,10 @@
     try {
       maybeStartRegistry();
       return registry.instanceIDs(SliderKeys.APP_TYPE);
-    } catch (YarnException | IOException e) {
+/// JDK7    } catch (YarnException | IOException e) {
+    } catch (IOException e) {
+      throw e;
+    } catch (YarnException e) {
       throw e;
     } catch (Exception e) {
       throw new IOException(e);
diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java b/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java
index e7b492b..3151a09 100644
--- a/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java
+++ b/slider-core/src/main/java/org/apache/slider/client/SliderYarnClientImpl.java
@@ -70,10 +70,10 @@
    */
   public List<ApplicationReport> listInstances(String user)
     throws YarnException, IOException {
-    Set<String> types = new HashSet<>(1);
+    Set<String> types = new HashSet<String>(1);
     types.add(SliderKeys.APP_TYPE);
     List<ApplicationReport> allApps = getApplications(types);
-    List<ApplicationReport> results = new ArrayList<>();
+    List<ApplicationReport> results = new ArrayList<ApplicationReport>();
     for (ApplicationReport report : allApps) {
       if (StringUtils.isEmpty(user) || user.equals(report.getUser())) {
         results.add(report);
@@ -97,7 +97,7 @@
                                                                   YarnException {
     List<ApplicationReport> instances = listInstances(user);
     List<ApplicationReport> results =
-      new ArrayList<>(instances.size());
+      new ArrayList<ApplicationReport>(instances.size());
     for (ApplicationReport report : instances) {
       if (report.getName().equals(appname)) {
         results.add(report);
@@ -240,7 +240,7 @@
                                                                       IOException {
     List<ApplicationReport> instances = listInstances(user);
     List<ApplicationReport> results =
-      new ArrayList<>(instances.size());
+      new ArrayList<ApplicationReport>(instances.size());
     for (ApplicationReport app : instances) {
       if (app.getName().equals(appname)
           && isApplicationLive(app)) {
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java b/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java
index 79b77dc..b115d98 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderExitCodes.java
@@ -73,7 +73,7 @@
   int EXIT_PROCESS_FAILED = 72;
 
   /**
-   * The cluster failed -too many containers were
+   * The instance failed -too many containers were
    * failing or some other threshold was reached
    */
   int EXIT_DEPLOYMENT_FAILED = 73;
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java b/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java
index 0ba562a..38f55c2 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderKeys.java
@@ -50,7 +50,7 @@
   /**
    * Application type for YARN  {@value}
    */
-  String APP_TYPE = "org.apache.slider";
+  String APP_TYPE = "org-apache-slider";
 
   /**
    * JVM arg to force IPv4  {@value}
@@ -167,4 +167,9 @@
   String PASSPHRASE = "DEV";
   String PASS_LEN = "50";
   String KEYSTORE_LOCATION = "ssl.server.keystore.location";
+
+  /**
+   * Python specific
+   */
+  String PYTHONPATH = "PYTHONPATH";
 }
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java b/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java
index 6d3fe38..e31cfb6 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderXMLConfKeysForTesting.java
@@ -71,4 +71,9 @@
    */
   String KEY_TEST_YARN_RAM_REQUEST = "slider.test.yarn.ram";
   String DEFAULT_YARN_RAM_REQUEST = "192";
+
+  /**
+   * security related keys
+   */
+  String TEST_SECURITY_DIR = "/tmp/work/security";
 }
diff --git a/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java b/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java
index 3f16f25..1bbe9ae 100644
--- a/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/common/SliderXmlConfKeys.java
@@ -84,19 +84,6 @@
   String KEY_AM_RESTART_LIMIT = "slider.yarn.restart.limit";
 
   /**
-   * Default Limit on restarts for the AM
-   * {@value}
-   */
-  int DEFAULT_AM_RESTART_LIMIT = 2;
-
-  /**
-   * Flag which is set to indicate that security should be enabled
-   * when talking to this cluster.
-   */
-  String KEY_SECURITY =
-      CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
-
-  /**
    * queue name
    */
   String KEY_YARN_QUEUE = "slider.yarn.queue";
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java
index 44bc239..f4a4569 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/AbstractActionArgs.java
@@ -25,7 +25,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -41,25 +40,22 @@
   }
 
   /**
-   * URI of the filesystem
+   * URI/binding to the filesystem
    */
   @Parameter(names = {ARG_FILESYSTEM, ARG_FILESYSTEM_LONG},
-             description = "Filesystem URI",
-             converter = URIArgumentConverter.class)
-  public URI filesystemURL;
+             description = "Filesystem Binding")
+  public String filesystemBinding;
 
   @Parameter(names = {ARG_BASE_PATH},
              description = "Slider base path on the filesystem",
              converter =  PathArgumentConverter.class)
   public Path basePath;
 
-
   /**
    * This is the default parameter
    */
   @Parameter
-  public final List<String> parameters = new ArrayList<>();
-
+  public final List<String> parameters = new ArrayList<String>();
 
   /**
    * get the name: relies on arg 1 being the cluster name in all operations 
@@ -80,7 +76,7 @@
    */
 
   @Parameter(names = ARG_DEFINE, arity = 1, description = "Definitions")
-  public final List<String> definitions = new ArrayList<>();
+  public final List<String> definitions = new ArrayList<String>();
 
   /**
    * System properties
@@ -88,11 +84,11 @@
   @Parameter(names = {ARG_SYSPROP}, arity = 1,
              description = "system properties in the form name value" +
                            " These are set after the JVM is started.")
-  public final List<String> sysprops = new ArrayList<>(0);
+  public final List<String> sysprops = new ArrayList<String>(0);
 
 
   @Parameter(names = {ARG_MANAGER_SHORT, ARG_MANAGER},
-             description = "hostname:port of the YARN resource manager")
+             description = "Binding (usually hostname:port) of the YARN resource manager")
   public String manager;
 
 
@@ -110,7 +106,7 @@
 
   /**
    * Get the name of the action
-   * @return
+   * @return the action name
    */
   public abstract String getActionName() ;
 
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/common/params/ActionUpdateArgs.java
similarity index 68%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/common/params/ActionUpdateArgs.java
index e3e595f..9d76bd8 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/ActionUpdateArgs.java
@@ -16,16 +16,17 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.common.params;
 
-public class AbstractRMOperation {
+import com.beust.jcommander.Parameters;
 
-  /**
-   * Execute the operation
-   * @param asyncRMClient client
-   */
-  public void execute(RMOperationHandler handler) {
+@Parameters(commandNames = {SliderActions.ACTION_UPDATE},
+            commandDescription = SliderActions.DESCRIBE_ACTION_UPDATE)
 
+public class ActionUpdateArgs extends AbstractClusterBuildingActionArgs {
+
+  @Override
+  public String getActionName() {
+    return SliderActions.ACTION_UPDATE;
   }
-  
 }
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java b/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java
index 248e4c2..1f07de3 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/AppAndResouceOptionArgsDelegate.java
@@ -37,7 +37,7 @@
   @Parameter(names = {ARG_OPTION, ARG_OPTION_SHORT}, arity = 2,
              description = ARG_OPTION + "<name> <value>",
              splitter = DontSplitArguments.class)
-  public List<String> optionTuples = new ArrayList<>(0);
+  public List<String> optionTuples = new ArrayList<String>(0);
 
 
   /**
@@ -47,7 +47,7 @@
              description = "Component option " + ARG_COMP_OPT +
                            " <component> <name> <option>",
              splitter = DontSplitArguments.class)
-  public List<String> compOptTriples = new ArrayList<>(0);
+  public List<String> compOptTriples = new ArrayList<String>(0);
 
   /**
    * Resource Options
@@ -55,7 +55,7 @@
   @Parameter(names = {ARG_RESOURCE_OPT, ARG_RESOURCE_OPT_SHORT}, arity = 2,
              description = "Resource option "+ ARG_RESOURCE_OPT + "<name> <value>",
              splitter = DontSplitArguments.class)
-  public List<String> resOptionTuples = new ArrayList<>(0);
+  public List<String> resOptionTuples = new ArrayList<String>(0);
 
 
   /**
@@ -65,7 +65,7 @@
              description = "Component resource option " + ARG_RES_COMP_OPT +
                            " <component> <name> <option>",
              splitter = DontSplitArguments.class)
-  public List<String> resCompOptTriples = new ArrayList<>(0);
+  public List<String> resCompOptTriples = new ArrayList<String>(0);
 
 
   public Map<String, String> getOptionsMap() throws
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java b/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java
index 0837dd2..83754b3 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/ArgOps.java
@@ -44,7 +44,7 @@
    * create a 3-tuple
    */
   public static List<Object> triple(String msg, int min, int max) {
-    List<Object> l = new ArrayList<>(3);
+    List<Object> l = new ArrayList<Object>(3);
     l.add(msg);
     l.add(min);
     l.add(max);
@@ -58,11 +58,12 @@
     return triple(msg, min, min);
   }
 
-  public static void applyFileSystemURL(URI filesystemURL, Configuration conf) {
-    if (filesystemURL != null) {
+  public static void applyFileSystemBinding(String filesystemBinding,
+      Configuration conf) {
+    if (filesystemBinding != null) {
       //filesystem argument was set -this overwrites any defaults in the
       //configuration
-      FileSystem.setDefaultUri(conf, filesystemURL);
+      FileSystem.setDefaultUri(conf, filesystemBinding);
     }
   }
 
@@ -99,7 +100,7 @@
   public static Map<String, String> convertTupleListToMap(String description,
                                                           List<String> list) throws
                                                                              BadCommandArgumentsException {
-    Map<String, String> results = new HashMap<>();
+    Map<String, String> results = new HashMap<String, String>();
     if (list != null && !list.isEmpty()) {
       int size = list.size();
       if (size % 2 != 0) {
@@ -133,7 +134,7 @@
                                                                          List<String> list) throws
                                                                                             BadCommandArgumentsException {
     Map<String, Map<String, String>> results =
-      new HashMap<>();
+      new HashMap<String, Map<String, String>>();
     if (list != null && !list.isEmpty()) {
       int size = list.size();
       if (size % 3 != 0) {
@@ -148,7 +149,7 @@
         Map<String, String> roleMap = results.get(role);
         if (roleMap == null) {
           //demand create new role map
-          roleMap = new HashMap<>();
+          roleMap = new HashMap<String, String>();
           results.put(role, roleMap);
         }
         if (roleMap.get(key) != null) {
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java
index 44a2a7a..ca854f1 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/ClientArgs.java
@@ -46,6 +46,7 @@
   private AbstractClusterBuildingActionArgs buildingActionArgs;
   private final ActionAMSuicideArgs actionAMSuicideArgs = new ActionAMSuicideArgs();
   private final ActionBuildArgs actionBuildArgs = new ActionBuildArgs();
+  private final ActionUpdateArgs actionUpdateArgs = new ActionUpdateArgs();
   private final ActionCreateArgs actionCreateArgs = new ActionCreateArgs();
   private final ActionDestroyArgs actionDestroyArgs = new ActionDestroyArgs();
   private final ActionExistsArgs actionExistsArgs = new ActionExistsArgs();
@@ -77,6 +78,7 @@
       actionAMSuicideArgs,
       actionBuildArgs,
       actionCreateArgs,
+      actionUpdateArgs,
       actionDestroyArgs,
       actionExistsArgs,
       actionFlexArgs,
@@ -101,9 +103,10 @@
       log.debug("Setting RM to {}", getManager());
       conf.set(YarnConfiguration.RM_ADDRESS, getManager());
     }
-    if ( getBasePath() != null ) {
+    if (getBasePath() != null) {
       log.debug("Setting basePath to {}", getBasePath());
-      conf.set(SliderXmlConfKeys.KEY_SLIDER_BASE_PATH, getBasePath().toString());
+      conf.set(SliderXmlConfKeys.KEY_SLIDER_BASE_PATH,
+          getBasePath().toString());
     }
   }
 
@@ -119,6 +122,10 @@
     return actionBuildArgs;
   }
 
+  public ActionUpdateArgs getActionUpdateArgs() {
+    return actionUpdateArgs;
+  }
+
   public ActionCreateArgs getActionCreateArgs() {
     return actionCreateArgs;
   }
@@ -182,6 +189,9 @@
       //its a builder, so set those actions too
       buildingActionArgs = actionCreateArgs;
 
+    } else if (SliderActions.ACTION_UPDATE.equals(action)) {
+      bindCoreAction(actionUpdateArgs);
+
     } else if (SliderActions.ACTION_FREEZE.equals(action)) {
       bindCoreAction(actionFreezeArgs);
 
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java
index 4cee1d1..5d94182 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/CommonArgs.java
@@ -30,7 +30,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.net.URI;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
@@ -63,11 +62,11 @@
 
    */
 
-  public Map<String, String> definitionMap = new HashMap<>();
+  public Map<String, String> definitionMap = new HashMap<String, String>();
   /**
    * System properties
    */
-  public Map<String, String> syspropsMap = new HashMap<>();
+  public Map<String, String> syspropsMap = new HashMap<String, String>();
 
 
   /**
@@ -215,12 +214,12 @@
 
 
   /**
-   * If the Filesystem URL was provided, it overrides anything in
+   * If the Filesystem binding was provided, it overrides anything in
    * the configuration
    * @param conf configuration
    */
-  public void applyFileSystemURL(Configuration conf) {
-    ArgOps.applyFileSystemURL(getFilesystemURL(), conf);
+  public void applyFileSystemBinding(Configuration conf) {
+    ArgOps.applyFileSystemBinding(getFilesystemBinding(), conf);
   }
 
   public boolean isDebug() {
@@ -228,8 +227,8 @@
   }
 
 
-  public URI getFilesystemURL() {
-    return coreAction.filesystemURL;
+  public String getFilesystemBinding() {
+    return coreAction.filesystemBinding;
   }
 
   public Path getBasePath() { return coreAction.basePath; }
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java b/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java
index 0344305..3225133 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/DontSplitArguments.java
@@ -27,7 +27,7 @@
 
   @Override
   public List<String> split(String value) {
-    List<String> list = new ArrayList<>(1);
+    List<String> list = new ArrayList<String>(1);
     list.add(value);
     return list;
   }
diff --git a/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java b/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java
index 2219a25..8e50a83 100644
--- a/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java
+++ b/slider-core/src/main/java/org/apache/slider/common/params/SliderActions.java
@@ -27,6 +27,7 @@
   String ACTION_AM_SUICIDE = "am-suicide";
   String ACTION_BUILD = "build";
   String ACTION_CREATE = "create";
+  String ACTION_UPDATE = "update";
   String ACTION_DESTROY = "destroy";
   String ACTION_ECHO = "echo";
   String ACTION_EXISTS = "exists";
@@ -49,6 +50,8 @@
     "Build a Slider cluster specification -but do not start it";
   String DESCRIBE_ACTION_CREATE =
       "Create a live Slider application";
+  String DESCRIBE_ACTION_UPDATE =
+      "Update template for a Slider application";
   String DESCRIBE_ACTION_DESTROY =
         "Destroy a frozen Slider application)";
   String DESCRIBE_ACTION_EXISTS =
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/Comparators.java b/slider-core/src/main/java/org/apache/slider/common/tools/Comparators.java
new file mode 100644
index 0000000..0ccca0f
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/Comparators.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.common.tools;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+public class Comparators {
+
+  public static class LongComparator implements Comparator<Long>, Serializable {
+    @Override
+    public int compare(Long o1, Long o2) {
+      long result = o1 - o2;
+      // need to comparisons with a diff greater than integer size
+      if (result < 0 ) {
+        return -1;
+      } else if (result >0) {
+        return 1;
+      }
+      return 0;
+    }
+  }
+
+  /**
+   * Little template class to reverse any comparitor
+   * @param <CompareType> the type that is being compared
+   */
+  public static class ComparatorReverser<CompareType> implements Comparator<CompareType>,
+      Serializable {
+
+    final Comparator<CompareType> instance;
+
+    public ComparatorReverser(Comparator<CompareType> instance) {
+      this.instance = instance;
+    }
+
+    @Override
+    public int compare(CompareType first, CompareType second) {
+      return instance.compare(second, first);
+    }
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java b/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java
index 2f0b9ac..b7e1323 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/ConfigHelper.java
@@ -81,7 +81,7 @@
 
    */
   public static TreeSet<String> sortedConfigKeys(Iterable<Map.Entry<String, String>> conf) {
-    TreeSet<String> sorted = new TreeSet<>();
+    TreeSet<String> sorted = new TreeSet<String>();
     for (Map.Entry<String, String> entry : conf) {
       sorted.add(entry.getKey());
     }
@@ -182,24 +182,53 @@
   public Document parseConfiguration(FileSystem fs,
                                      Path path) throws
                                                 IOException {
-    int len = (int) fs.getLength(path);
-    byte[] data = new byte[len];
-    try(FSDataInputStream in = fs.open(path)) {
-      in.readFully(0, data);
-    }
 
+
+    byte[] data = loadBytes(fs, path);
     //this is here to track down a parse issue
     //related to configurations
-    String s = new String(data, 0, len);
+    String s = new String(data, 0, data.length);
     log.debug("XML resource {} is \"{}\"", path, s);
+/* JDK7
     try (ByteArrayInputStream in = new ByteArrayInputStream(data)) {
       Document document = parseConfigXML(in);
       return document;
     } catch (ParserConfigurationException | SAXException e) {
       throw new IOException(e);
     }
+*/
+    ByteArrayInputStream in= null;
+    try {
+      in = new ByteArrayInputStream(data);
+      Document document = parseConfigXML(in);
+      return document;
+    } catch (ParserConfigurationException e) {
+      throw new IOException(e);
+    } catch (SAXException e) {
+      throw new IOException(e);
+    } finally {
+      IOUtils.closeStream(in);
+    }
   }
-  
+
+  public static byte[] loadBytes(FileSystem fs, Path path) throws IOException {
+    int len = (int) fs.getLength(path);
+    byte[] data = new byte[len];
+    /* JDK7
+    try(FSDataInputStream in = fs.open(path)) {
+      in.readFully(0, data);
+    }
+*/
+    FSDataInputStream in = null;
+    in = fs.open(path);
+    try {
+      in.readFully(0, data);
+    } finally {
+      IOUtils.closeStream(in);
+    }
+    return data;
+  }
+
   /**
    * Load a configuration from ANY FS path. The normal Configuration
    * loader only works with file:// URIs
@@ -209,13 +238,9 @@
    * @throws IOException
    */
   public static Configuration loadConfiguration(FileSystem fs,
-                                                Path path) throws
-                                                                   IOException {
-    int len = (int) fs.getLength(path);
-    byte[] data = new byte[len];
-    try (FSDataInputStream in = fs.open(path)) {
-      in.readFully(0, data);
-    }
+                                                Path path) throws IOException {
+    byte[] data = loadBytes(fs, path);
+
     ByteArrayInputStream in2;
 
     in2 = new ByteArrayInputStream(data);
@@ -510,7 +535,7 @@
    * @return hash map
    */
   public static Map<String, String> buildMapFromConfiguration(Configuration conf) {
-    Map<String, String> map = new HashMap<>();
+    Map<String, String> map = new HashMap<String, String>();
     return SliderUtils.mergeEntries(map, conf);
   }
 
@@ -523,7 +548,8 @@
    * @param valuesource the source of values
    * @return a new configuration where <code>foreach key in keysource, get(key)==valuesource.get(key)</code>
    */
-  public static Configuration resolveConfiguration(Iterable<Map.Entry<String, String>> keysource,
+  public static Configuration resolveConfiguration(
+      Iterable<Map.Entry<String, String>> keysource,
       Configuration valuesource) {
     Configuration result = new Configuration(false);
     for (Map.Entry<String, String> entry : keysource) {
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java b/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java
index 714322c..def252a 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/CoreFileSystem.java
@@ -50,6 +50,9 @@
 import java.nio.charset.Charset;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
 
 import static org.apache.slider.common.SliderXmlConfKeys.CLUSTER_DIRECTORY_PERMISSIONS;
 import static org.apache.slider.common.SliderXmlConfKeys.DEFAULT_CLUSTER_DIRECTORY_PERMISSIONS;
@@ -242,6 +245,7 @@
           IOException,
       SliderException {
     if (fileSystem.exists(clusterDirectory)) {
+      
       log.error("Dir {} exists: {}",
                 clusterDirectory,
                 listFSDir(clusterDirectory));
@@ -303,6 +307,37 @@
   }
 
   /**
+   * Verify that a file exists in the zip file given by path
+   * @param path path to zip file
+   * @param file file expected to be in zip
+   * @throws FileNotFoundException file not found or is not a zip file
+   * @throws IOException  trouble with FS
+   */
+  public void verifyFileExistsInZip(Path path, String file) throws IOException {
+    fileSystem.copyToLocalFile(path, new Path("/tmp"));
+    File dst = new File((new Path("/tmp", path.getName())).toString());
+    Enumeration<? extends ZipEntry> entries;
+    ZipFile zipFile = new ZipFile(dst);
+    boolean found = false;
+
+    try {
+      entries = zipFile.entries();
+      while (entries.hasMoreElements()) {
+        ZipEntry entry = entries.nextElement();
+        String nm = entry.getName();
+        if (nm.endsWith(file)) {
+          found = true;
+          break;
+        }
+      }
+    } finally {
+      zipFile.close();
+    }
+    dst.delete();
+    if (!found) throw new FileNotFoundException("file: " + file + " not found in " + path);
+    log.info("Verification of " + path + " passed");
+  }
+  /**
    * Create the application-instance specific temporary directory
    * in the DFS
    *
@@ -407,7 +442,7 @@
     //copied to the destination
     FileStatus[] fileset = fileSystem.listStatus(srcDir);
     Map<String, LocalResource> localResources =
-            new HashMap<>(fileset.length);
+            new HashMap<String, LocalResource>(fileset.length);
     for (FileStatus entry : fileset) {
 
       LocalResource resource = createAmResource(entry.getPath(),
diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
index 17f8b70..188b7d9 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.slider.common.tools;
 
+import com.google.common.base.Preconditions;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.io.output.ByteArrayOutputStream;
@@ -42,7 +43,7 @@
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.RoleKeys;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.common.SliderXmlConfKeys;
@@ -439,7 +440,7 @@
    * @return a stringified list
    */
   public static List<String> collectionToStringList(Collection c) {
-    List<String> l = new ArrayList<>(c.size());
+    List<String> l = new ArrayList<String>(c.size());
     for (Object o : c) {
       l.add(o.toString());
     }
@@ -467,13 +468,19 @@
    */
   public static String join(Collection collection, String separator, boolean trailing) {
     StringBuilder b = new StringBuilder();
+    // fast return on empty collection
+    if (collection.isEmpty()) {
+      return trailing ? separator : "";
+    }
     for (Object o : collection) {
       b.append(o);
       b.append(separator);
     }
-    return trailing? 
-           b.toString()
-           : (b.substring(0, b.length() - separator.length()));
+    int length = separator.length();
+    String s = b.toString();
+    return (trailing || s.isEmpty())?
+           s
+           : (b.substring(0, b.length() - length));
   }
 
   /**
@@ -610,6 +617,8 @@
    */
   public static <T1, T2> Map<T1, T2> mergeMapsIgnoreDuplicateKeys(Map<T1, T2> first,
                                                                   Map<T1, T2> second) {
+    Preconditions.checkArgument(first != null, "Null 'first' value");
+    Preconditions.checkArgument(second != null, "Null 'second' value");
     for (Map.Entry<T1, T2> entry : second.entrySet()) {
       T1 key = entry.getKey();
       if (!first.containsKey(key)) {
@@ -830,7 +839,7 @@
    * @return a possibly empty map of environment variables.
    */
   public static Map<String, String> buildEnvMap(Map<String, String> roleOpts) {
-    Map<String, String> env = new HashMap<>();
+    Map<String, String> env = new HashMap<String, String>();
     if (roleOpts != null) {
       for (Map.Entry<String, String> entry: roleOpts.entrySet()) {
         String key = entry.getKey();
@@ -857,7 +866,7 @@
       Map<String, String> optionMap = entry.getValue();
       Map<String, String> existingMap = clusterRoleMap.get(key);
       if (existingMap == null) {
-        existingMap = new HashMap<>();
+        existingMap = new HashMap<String, String>();
       }
       log.debug("Overwriting role options with command line values {}",
                 stringifyMap(optionMap));
@@ -1022,7 +1031,7 @@
   }
 
     public static Map<String, Map<String, String>> deepClone(Map<String, Map<String, String>> src) {
-    Map<String, Map<String, String>> dest = new HashMap<>();
+    Map<String, Map<String, String>> dest = new HashMap<String, Map<String, String>>();
     for (Map.Entry<String, Map<String, String>> entry : src.entrySet()) {
       dest.put(entry.getKey(), stringMapClone(entry.getValue()));
     }
@@ -1030,7 +1039,7 @@
   }
 
   public static Map<String, String> stringMapClone(Map<String, String> src) {
-    Map<String, String> dest =  new HashMap<>();
+    Map<String, String> dest =  new HashMap<String, String>();
     return mergeEntries(dest, src.entrySet());
   }
 
@@ -1073,7 +1082,7 @@
       UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
       return currentUser;
     } catch (IOException e) {
-      log.info("Failed to grt user info", e);
+      log.info("Failed to get user info", e);
       throw e;
     }
   }
@@ -1294,8 +1303,8 @@
       SliderException, IOException {
     Path imagePath;
     String imagePathOption =
-        internalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
-    String appHomeOption = internalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME);
+        internalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+    String appHomeOption = internalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME);
     if (!isUnset(imagePathOption)) {
       imagePath = fs.createPathThatMustExist(imagePathOption);
     } else {
@@ -1357,9 +1366,9 @@
 
   /**
    * Append a list of paths, inserting "/" signs as appropriate
-   * @param base
-   * @param paths
-   * @return
+   * @param base base path/URL
+   * @param paths subpaths
+   * @return base+"/"+paths[0]+"/"+paths[1]...
    */
   public static String appendToURL(String base, String...paths) {
     String result = base;
@@ -1415,6 +1424,21 @@
   }
 
   /**
+   * A compareTo function that converts the result of a long
+   * comparision into the integer that <code>Comparable</code>
+   * expects.
+   * @param left left side
+   * @param right right side
+   * @return -1, 0, 1 depending on the diff
+   */
+  public static int compareTo(long left, long right) {
+    long diff = left - right;
+    if (diff < 0) return -1;
+    if (diff > 0) return 1;
+    return 0;
+  }
+  
+  /**
    * This wrapps ApplicationReports and generates a string version
    * iff the toString() operator is invoked
    */
@@ -1452,7 +1476,7 @@
           }
           is = new ByteArrayInputStream(content);
         } else {
-          log.info("Size unknown. Reading {}", zipEntry.getName());
+          log.debug("Size unknown. Reading {}", zipEntry.getName());
           ByteArrayOutputStream baos = new ByteArrayOutputStream();
           while (true) {
             int byteRead = zis.read();
diff --git a/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java b/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java
index 0580013..937b777 100644
--- a/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java
+++ b/slider-core/src/main/java/org/apache/slider/core/build/InstanceBuilder.java
@@ -24,6 +24,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.OptionKeys;
 import org.apache.slider.api.StatusKeys;
 import org.apache.slider.common.SliderXmlConfKeys;
@@ -139,7 +140,7 @@
                     instancePaths.dataPath.toUri());
 
 
-    internalOps.set(OptionKeys.INTERNAL_PROVIDER_NAME, provider);
+    internalOps.set(InternalKeys.INTERNAL_PROVIDER_NAME, provider);
     internalOps.set(OptionKeys.APPLICATION_NAME, clustername);
 
   }
@@ -224,16 +225,18 @@
 
   /**
    * Persist this
+   * @param appconfdir conf dir
+   * @param overwrite if true, we don't need to create cluster dir
    * @throws IOException
    * @throws SliderException
    * @throws LockAcquireFailedException
    * @param appconfdir dir to persist the conf to
    */
-  public void persist(Path appconfdir) throws
+  public void persist(Path appconfdir, boolean overwrite) throws
       IOException,
       SliderException,
       LockAcquireFailedException {
-    coreFS.createClusterDirectories(instancePaths);
+    if (!overwrite) coreFS.createClusterDirectories(instancePaths);
     ConfPersister persister =
       new ConfPersister(coreFS, getInstanceDir());
     ConfDirSnapshotAction action = null;
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java
index 2cbfd54..5517771 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTree.java
@@ -55,14 +55,14 @@
   /**
    * Metadata
    */
-  public Map<String, Object> metadata = new HashMap<>(INITAL_MAP_CAPACITY);
+  public Map<String, Object> metadata = new HashMap<String, Object >(INITAL_MAP_CAPACITY);
 
 
   /**
    * Global options
    */
   public Map<String, String> global =
-    new HashMap<>(INITAL_MAP_CAPACITY);
+    new HashMap<String, String>(INITAL_MAP_CAPACITY);
 
 
   /**
@@ -70,7 +70,7 @@
    * role -> option -> value
    */
   public Map<String, Map<String, String>> components =
-    new HashMap<>(INITAL_MAP_CAPACITY);
+    new HashMap<String, Map<String, String>>(INITAL_MAP_CAPACITY);
 
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java
index 1cb537a..bb17547 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/ConfTreeOperations.java
@@ -148,7 +148,7 @@
       return operations;
     }
     //create a new instances
-    Map<String, String> map = new HashMap<>();
+    Map<String, String> map = new HashMap<String, String>();
     confTree.components.put(name, map);
     return new MapOperations(name, map);
   }
@@ -159,7 +159,7 @@
    */
   @JsonIgnore
   public Set<String> getComponentNames() {
-    return new HashSet<>(confTree.components.keySet());
+    return new HashSet<String>(confTree.components.keySet());
   }
   
   
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
index bb57b94..4b1b44f 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
@@ -18,6 +18,7 @@
 
 package org.apache.slider.core.conf;
 
+import com.google.common.base.Preconditions;
 import org.apache.slider.common.tools.SliderUtils;
 import org.apache.slider.core.exceptions.BadConfigException;
 import org.slf4j.Logger;
@@ -46,7 +47,7 @@
   public final String name;
 
   public MapOperations() {
-    options = new HashMap<>();
+    options = new HashMap<String, String>();
     name = "";
   }
 
@@ -61,12 +62,11 @@
     this.name = name;
   }
 
-
   /**
-   * Get a cluster option or value
+   * Get an option value
    *
-   * @param key
-   * @param defVal
+   * @param key key
+   * @param defVal default value
    * @return option in map or the default
    */
   public String getOption(String key, String defVal) {
@@ -74,14 +74,27 @@
     return val != null ? val : defVal;
   }
 
+  /**
+   * Get a boolean option
+   *
+   * @param key option key
+   * @param defVal default value
+   * @return option true if the option equals "true", or the default value
+   * if the option was not defined at all.
+   */
+  public Boolean getOptionBool(String key, boolean defVal) {
+    String val = getOption(key, Boolean.toString(defVal));
+    return Boolean.valueOf(val);
+  }
 
   /**
    * Get a cluster option or value
    *
-   * @param key
+   * @param key option key
    * @return the value
    * @throws BadConfigException if the option is missing
    */
+
   public String getMandatoryOption(String key) throws BadConfigException {
     String val = options.get(key);
     if (val == null) {
@@ -247,4 +260,35 @@
     }
     return builder.toString();
   }
+
+  /**
+   * Get the time range of a set of keys
+   * @param basekey
+   * @param defDays
+   * @param defHours
+   * @param defMins
+   * @param defSecs
+   * @return
+   */
+  public long getTimeRange(String basekey,
+      int defDays,
+      int defHours,
+      int defMins,
+      int defSecs) {
+    Preconditions.checkArgument(basekey != null);
+    int days = getOptionInt(basekey + ".days", defDays);
+    int hours = getOptionInt(basekey + ".hours", defHours);
+
+    int minutes = getOptionInt(basekey + ".minutes", defMins);
+    int seconds = getOptionInt(basekey + ".seconds", defSecs);
+    // range check
+    Preconditions.checkState(days >= 0 && hours >= 0 && minutes >= 0
+                             && seconds >= 0,
+        "Time range for %s has negative time component %s:%s:%s:%s",
+        basekey, days, hours, minutes, seconds);
+
+    // calculate total time, schedule the reset if expected
+    long totalMinutes = days * 24 * 60 + hours * 24 + minutes;
+    return totalMinutes * 60 + seconds;
+  }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java b/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java
index c949c1c..894f19b 100644
--- a/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java
+++ b/slider-core/src/main/java/org/apache/slider/core/exceptions/ErrorStrings.java
@@ -20,7 +20,7 @@
 
 public interface ErrorStrings {
   String E_UNSTABLE_CLUSTER = "Unstable Application Instance :";
-  String E_CLUSTER_RUNNING = "Application Instance  lready running";
+  String E_CLUSTER_RUNNING = "Application Instance already running";
   String E_ALREADY_EXISTS = "already exists";
   String PRINTF_E_INSTANCE_ALREADY_EXISTS = "Application Instance \"%s\" already exists and is defined in %s";
   String PRINTF_E_INSTANCE_DIR_ALREADY_EXISTS = "Application Instance dir already exists: %s";
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java b/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java
index d8c3522..644f627 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/AbstractLauncher.java
@@ -18,6 +18,7 @@
 
 package org.apache.slider.core.launch;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
@@ -57,15 +58,15 @@
   /**
    * Env vars; set up at final launch stage
    */
-  protected final Map<String, String> envVars = new HashMap<>();
+  protected final Map<String, String> envVars = new HashMap<String, String>();
   protected final MapOperations env = new MapOperations("env", envVars);
   protected final ContainerLaunchContext containerLaunchContext =
     Records.newRecord(ContainerLaunchContext.class);
-  protected final List<String> commands = new ArrayList<>(20);
+  protected final List<String> commands = new ArrayList<String>(20);
   protected final Map<String, LocalResource> localResources =
-    new HashMap<>();
+    new HashMap<String, LocalResource>();
   private final Map<String, ByteBuffer> serviceData =
-    new HashMap<>();
+    new HashMap<String, ByteBuffer>();
   // security
   Credentials credentials = new Credentials();
 
@@ -238,6 +239,8 @@
     setEnv("CLASSPATH", classpath.buildClasspath());
   }
   public void setEnv(String var, String value) {
+    Preconditions.checkArgument(var != null, "null variable name");
+    Preconditions.checkArgument(value != null, "null value");
     env.put(var, value);
   }
 
@@ -266,7 +269,7 @@
 
   public String[] dumpEnvToString() {
 
-    List<String> nodeEnv = new ArrayList<>();
+    List<String> nodeEnv = new ArrayList<String>();
 
     for (Map.Entry<String, String> entry : env.entrySet()) {
       String envElt = String.format("%s=\"%s\"",
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java b/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java
index dc4a886..bd8a0a5 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/AppMasterLauncher.java
@@ -50,7 +50,7 @@
   private final ApplicationSubmissionContext submissionContext;
   private final ApplicationId appId;
   private final boolean secureCluster;
-  private int maxAppAttempts = 2;
+  private int maxAppAttempts = 0;
   private boolean keepContainersOverRestarts = true;
   private String queue = YarnConfiguration.DEFAULT_QUEUE_NAME;
   private int priority = 1;
@@ -174,7 +174,10 @@
       submissionContext.setKeepContainersAcrossApplicationAttempts(true);
     }
 
-    submissionContext.setMaxAppAttempts(maxAppAttempts);
+    if (maxAppAttempts > 0) {
+      log.debug("Setting max AM attempts to {}", maxAppAttempts);
+      submissionContext.setMaxAppAttempts(maxAppAttempts);
+    }
 
     if (secureCluster) {
       addSecurityTokens();
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java b/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java
index 3527149..8e49435 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/ClasspathConstructor.java
@@ -39,7 +39,7 @@
 
     public static final String CLASS_PATH_SEPARATOR = ApplicationConstants.CLASS_PATH_SEPARATOR;
 //  public static final String CLASS_PATH_SEPARATOR = File.pathSeparator;
-  private final List<String> pathElements = new ArrayList<>();
+  private final List<String> pathElements = new ArrayList<String>();
 
   public ClasspathConstructor() {
   }
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java b/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java
index dbaa981..f50bb48 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/CommandLineBuilder.java
@@ -30,7 +30,7 @@
  * Special support for JVM command buildup.
  */
 public class CommandLineBuilder {
-  protected final List<String> argumentList = new ArrayList<>(20);
+  protected final List<String> argumentList = new ArrayList<String>(20);
 
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java b/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java
index 7b60461..0367e06 100644
--- a/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java
+++ b/slider-core/src/main/java/org/apache/slider/core/launch/JavaCommandLineBuilder.java
@@ -19,6 +19,7 @@
 package org.apache.slider.core.launch;
 
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.slider.common.tools.SliderUtils;
 
@@ -63,6 +64,8 @@
    * @param value
    */
   public void sysprop(String property, String value) {
+    Preconditions.checkArgument(property != null, "null property name");
+    Preconditions.checkArgument(value != null, "null value");
     add("-D" + property + "=" + value);
   }
   
diff --git a/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java b/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java
index c92dfda..df12849 100644
--- a/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java
+++ b/slider-core/src/main/java/org/apache/slider/core/main/ServiceLauncher.java
@@ -202,15 +202,11 @@
    * to the configuration, and <code>service</code> to the service.
    *
    * @param conf configuration to use
-   * @throws ClassNotFoundException no such class
-   * @throws InstantiationException no empty constructor,
-   * problems with dependencies
    * @throws ClassNotFoundException classname not on the classpath
    * @throws IllegalAccessException not allowed at the class
    * @throws InstantiationException not allowed to instantiate it
-   * @throws InterruptedException thread interrupted
-   * @throws Throwable any other failure
    */
+  @SuppressWarnings("unchecked")
   public Service instantiateService(Configuration conf)
       throws ClassNotFoundException, InstantiationException, IllegalAccessException,
       ExitUtil.ExitException, NoSuchMethodException, InvocationTargetException {
@@ -423,7 +419,7 @@
     if (argCount <= 1 ) {
       return new String[0];
     }
-    List<String> argsList = new ArrayList<>(argCount);
+    List<String> argsList = new ArrayList<String>(argCount);
     ListIterator<String> arguments = args.listIterator();
     //skip that first entry
     arguments.next();
@@ -447,6 +443,7 @@
         try {
           conf.addResource(file.toURI().toURL());
         } catch (MalformedURLException e) {
+          LOG.debug("File {} cannot be converted to URL: {}", e);
           exitWithMessage(EXIT_COMMAND_ARGUMENT_ERROR,
               ARG_CONF + ": configuration file path invalid: " + file);
         }
diff --git a/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java b/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java
index 3512168..ab71683 100644
--- a/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java
+++ b/slider-core/src/main/java/org/apache/slider/core/persist/JsonSerDeser.java
@@ -105,7 +105,8 @@
    * @throws IOException IO problems
    * @throws JsonMappingException failure to map from the JSON to this class
    */
-  public T fromResource(String resource)
+/* JDK7
+ public T fromResource(String resource)
     throws IOException, JsonParseException, JsonMappingException {
     try(InputStream resStream = this.getClass().getResourceAsStream(resource)) {
       if (resStream == null) {
@@ -116,6 +117,30 @@
       log.error("Exception while parsing json resource {}: {}", resource, e);
       throw e;
     }
+  }*/
+
+  /**
+   * Convert from a JSON file
+   * @param resource input file
+   * @return the parsed JSON
+   * @throws IOException IO problems
+   * @throws JsonMappingException failure to map from the JSON to this class
+   */
+  public synchronized T fromResource(String resource)
+      throws IOException, JsonParseException, JsonMappingException {
+    InputStream resStream = null;
+    try {
+      resStream = this.getClass().getResourceAsStream(resource);
+      if (resStream == null) {
+        throw new FileNotFoundException(resource);
+      }
+      return (T) (mapper.readValue(resStream, classType));
+    } catch (IOException e) {
+      log.error("Exception while parsing json resource {}: {}", resource, e);
+      throw e;
+    } finally {
+      IOUtils.closeStream(resStream);
+    }
   }
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java
index f498916..eac34c0 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigSet.java
@@ -41,7 +41,7 @@
       RestPaths.PUBLISHED_CONFIGURATION_REGEXP);
   
   public Map<String, PublishedConfiguration> configurations =
-      new HashMap<>();
+      new HashMap<String, PublishedConfiguration>();
 
   public PublishedConfigSet() {
   }
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java
index 93282cc..f76b93b 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfiguration.java
@@ -46,7 +46,7 @@
   
   public String updatedTime;
 
-  public Map<String, String> entries = new HashMap<>();
+  public Map<String, String> entries = new HashMap<String, String>();
 
   public PublishedConfiguration() {
   }
@@ -112,7 +112,7 @@
    * @param entries entries to put
    */
   public void putValues(Iterable<Map.Entry<String, String>> entries) {
-    this.entries = new HashMap<>();
+    this.entries = new HashMap<String, String>();
     for (Map.Entry<String, String> entry : entries) {
       this.entries.put(entry.getKey(), entry.getValue());
     }
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java
index 929b8ef..bf812dd 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/PublishedConfigurationOutputter.java
@@ -48,12 +48,24 @@
    * @param dest destination file
    * @throws IOException
    */
+/* JDK7
   public void save(File dest) throws IOException {
     try(FileOutputStream out = new FileOutputStream(dest)) {
       save(out);
       out.close();
     }
   }
+*/
+  public void save(File dest) throws IOException {
+    FileOutputStream out = null;
+    try {
+      out = new FileOutputStream(dest);
+      save(out);
+      out.close();
+    } finally {
+      org.apache.hadoop.io.IOUtils.closeStream(out);
+    }
+  }
 
   /**
    * Save the content. The default saves the asString() value
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java
index 120966f..a76e28d 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/docstore/UriMap.java
@@ -29,7 +29,7 @@
 @JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
 public class UriMap {
 
-  public Map<String, String> uris = new HashMap<>();
+  public Map<String, String> uris = new HashMap<String, String>();
   
   @JsonIgnore
   public void put(String key, String value) {
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java b/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java
index 07e4981..bdf70a2 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/info/RegistryView.java
@@ -31,7 +31,8 @@
   /**
    * Endpoints
    */
-  public Map<String, RegisteredEndpoint> endpoints = new HashMap<>(2);
+  public Map<String, RegisteredEndpoint> endpoints =
+      new HashMap<String, RegisteredEndpoint>(2);
 
   public String configurationsURL;
   
diff --git a/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java b/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java
index 80f0b34..c3c7e63 100644
--- a/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java
+++ b/slider-core/src/main/java/org/apache/slider/core/registry/info/ServiceInstanceData.java
@@ -124,13 +124,13 @@
   public Map<String, RegisteredEndpoint> listEndpoints(boolean external) {
     RegistryView view = getRegistryView(external);
     if (view == null) {
-      return new HashMap<>(0);
+      return new HashMap<String, RegisteredEndpoint>(0);
     }
     Map<String, RegisteredEndpoint> endpoints = view.endpoints;
     if (endpoints != null) {
       return endpoints;
     } else {
-      return new HashMap<>(0);
+      return new HashMap<String, RegisteredEndpoint>(0);
     }
   }
   
diff --git a/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java b/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java
index 54aeb4f..0d96559 100644
--- a/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java
+++ b/slider-core/src/main/java/org/apache/slider/core/zk/ZKIntegration.java
@@ -49,7 +49,7 @@
   public static String SVC_SLIDER = "/" + ZK_SERVICES + "/" + ZK_SLIDER;
   public static String SVC_SLIDER_USERS = SVC_SLIDER + "/" + ZK_USERS;
 
-  public static final List<String> ZK_USERS_PATH_LIST = new ArrayList<>();
+  public static final List<String> ZK_USERS_PATH_LIST = new ArrayList<String>();
   static {
     ZK_USERS_PATH_LIST.add(ZK_SERVICES);
     ZK_USERS_PATH_LIST.add(ZK_SLIDER);
diff --git a/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java b/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java
index 8bf25f9..61b1ff0 100644
--- a/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/core/zk/ZookeeperUtils.java
@@ -49,7 +49,7 @@
     if (strings != null) {
       len = strings.length;
     }
-    List<String> tuples = new ArrayList<>(len);
+    List<String> tuples = new ArrayList<String>(len);
     if (strings != null) {
       for (String s : strings) {
         tuples.add(s.trim());
@@ -70,7 +70,7 @@
     if (strings != null) {
       len = strings.length;
     }
-    List<HostAndPort> list = new ArrayList<>(len);
+    List<HostAndPort> list = new ArrayList<HostAndPort>(len);
     if (strings != null) {
       for (String s : strings) {
         list.add(HostAndPort.fromString(s.trim()));
@@ -113,7 +113,7 @@
    * @return
    */
   public static String buildQuorum(List<HostAndPort> hostAndPorts, int defaultPort) {
-    List<String> entries = new ArrayList<>(hostAndPorts.size());
+    List<String> entries = new ArrayList<String>(hostAndPorts.size());
     for (HostAndPort hostAndPort : hostAndPorts) {
       entries.add(buildQuorumEntry(hostAndPort, defaultPort));
     }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
index e35227c..3fbd3cf 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
@@ -20,9 +20,13 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.Service;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.common.tools.ConfigHelper;
+import org.apache.slider.common.tools.SliderFileSystem;
 import org.apache.slider.common.tools.SliderUtils;
 import org.apache.slider.core.conf.AggregateConf;
 import org.apache.slider.core.exceptions.BadCommandArgumentsException;
@@ -30,7 +34,9 @@
 import org.apache.slider.core.main.ExitCodeProvider;
 import org.apache.slider.core.registry.info.RegisteredEndpoint;
 import org.apache.slider.core.registry.info.ServiceInstanceData;
-import org.apache.slider.server.appmaster.AMViewForProviders;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.state.ContainerReleaseSelector;
+import org.apache.slider.server.appmaster.state.MostRecentContainerReleaseSelector;
 import org.apache.slider.server.appmaster.state.StateAccessForProviders;
 import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations;
 import org.apache.slider.server.services.registry.RegistryViewForProviders;
@@ -42,7 +48,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Collection;
 import java.util.HashMap;
@@ -67,8 +72,8 @@
   protected AgentRestOperations restOps;
   protected RegistryViewForProviders registry;
   protected ServiceInstanceData registryInstanceData;
-  protected AMViewForProviders amView;
   protected URL amWebAPI;
+  protected QueueAccess queueAccess;
 
   public AbstractProviderService(String name) {
     super(name);
@@ -83,8 +88,8 @@
     return amState;
   }
 
-  public AMViewForProviders getAppMaster() {
-    return amView;
+  public QueueAccess getQueueAccess() {
+    return queueAccess;
   }
 
   public void setAmState(StateAccessForProviders amState) {
@@ -93,10 +98,12 @@
 
   @Override
   public void bind(StateAccessForProviders stateAccessor,
-      RegistryViewForProviders reg, AMViewForProviders amView) {
+      RegistryViewForProviders reg,
+      QueueAccess queueAccess,
+      List<Container> liveContainers) {
     this.amState = stateAccessor;
     this.registry = reg;
-    this.amView = amView;
+    this.queueAccess = queueAccess;
   }
 
   @Override
@@ -104,6 +111,10 @@
     return restOps;
   }
 
+  @Override
+  public void notifyContainerCompleted(ContainerId containerId) {
+  }
+
   public void setAgentRestOperations(AgentRestOperations agentRestOperations) {
     this.restOps = agentRestOperations;
   }
@@ -136,6 +147,15 @@
 
   /**
    * No-op implementation of this method.
+   */
+  @Override
+  public void initializeApplicationConfiguration(
+      AggregateConf instanceDefinition, SliderFileSystem fileSystem)
+      throws IOException, SliderException {
+  }
+
+  /**
+   * No-op implementation of this method.
    *
    * {@inheritDoc}
    */
@@ -271,7 +291,7 @@
    */
   @Override
   public Map<String, String> buildProviderStatus() {
-    return new HashMap<>();
+    return new HashMap<String, String>();
   }
 
   /*
@@ -280,7 +300,7 @@
    */
   @Override
   public Map<String, String> buildMonitorDetails(ClusterDescription clusterDesc) {
-    Map<String, String> details = new LinkedHashMap<>();
+    Map<String, String> details = new LinkedHashMap<String, String>();
 
     // add in all the 
     buildEndpointDetails(details);
@@ -313,11 +333,39 @@
   }
   @Override
   public void applyInitialRegistryDefinitions(URL unsecureWebAPI,
-                                              URL secureWebAPI,
-                                              ServiceInstanceData registryInstanceData) throws MalformedURLException,
-      IOException {
+      URL secureWebAPI,
+      ServiceInstanceData registryInstanceData) throws IOException {
 
       this.amWebAPI = unsecureWebAPI;
     this.registryInstanceData = registryInstanceData;
   }
+
+  /**
+   * {@inheritDoc}
+   * 
+   * 
+   * @return The base implementation returns the most recent containers first.
+   */
+  @Override
+  public ContainerReleaseSelector createContainerReleaseSelector() {
+    return new MostRecentContainerReleaseSelector();
+  }
+
+  @Override
+  public void releaseAssignedContainer(ContainerId containerId) {
+    // no-op
+  }
+
+  @Override
+  public void addContainerRequest(AMRMClient.ContainerRequest req) {
+    // no-op
+  }
+
+  /**
+   * No-op implementation of this method.
+   */
+  @Override
+  public void rebuildContainerDetails(List<Container> liveContainers,
+      String applicationId, Map<Integer, ProviderRole> providerRoles) {
+  }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java
index 56e24e9..0f5b4fb 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/ProviderService.java
@@ -22,6 +22,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.common.tools.SliderFileSystem;
 import org.apache.slider.core.conf.AggregateConf;
@@ -31,19 +32,23 @@
 import org.apache.slider.core.launch.ContainerLauncher;
 import org.apache.slider.core.main.ExitCodeProvider;
 import org.apache.slider.core.registry.info.ServiceInstanceData;
-import org.apache.slider.server.appmaster.AMViewForProviders;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.state.ContainerReleaseSelector;
+import org.apache.slider.server.appmaster.operations.RMOperationHandlerActions;
 import org.apache.slider.server.appmaster.state.StateAccessForProviders;
 import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations;
 import org.apache.slider.server.services.registry.RegistryViewForProviders;
 
 import java.io.File;
 import java.io.IOException;
-import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.List;
 import java.util.Map;
 
-public interface ProviderService extends ProviderCore, Service,
-                                         ExitCodeProvider {
+public interface ProviderService extends ProviderCore,
+    Service,
+    RMOperationHandlerActions,
+    ExitCodeProvider {
 
   /**
    * Set up the entire container launch context
@@ -69,6 +74,12 @@
       SliderException;
 
   /**
+   * Notify the providers of container completion
+   * @param containerId container that has completed
+   */
+  void notifyContainerCompleted(ContainerId containerId);
+
+  /**
    * Execute a process in the AM
    * @param instanceDefinition cluster description
    * @param confDir configuration directory
@@ -104,6 +115,17 @@
     throws BadCommandArgumentsException, IOException;
 
   /**
+   * The application configuration should be initialized here
+   * 
+   * @param instanceDefinition
+   * @param fileSystem
+   * @throws IOException
+   * @throws SliderException
+   */
+  void initializeApplicationConfiguration(AggregateConf instanceDefinition,
+      SliderFileSystem fileSystem) throws IOException, SliderException;
+
+  /**
    * This is a validation of the application configuration on the AM.
    * Here is where things like the existence of keytabs and other
    * not-seen-client-side properties can be tested, before
@@ -135,15 +157,10 @@
    */
   Map<String, String> buildMonitorDetails(ClusterDescription clusterSpec);
 
-  /**
-   * bind operation -invoked before the service is started
-   * @param stateAccessor interface offering read access to the state
-   * @param registry
-   * @param amView
-   */
-  void bind(StateAccessForProviders stateAccessor,
-            RegistryViewForProviders registry,
-            AMViewForProviders amView);
+  public void bind(StateAccessForProviders stateAccessor,
+      RegistryViewForProviders reg,
+      QueueAccess queueAccess,
+      List<Container> liveContainers);
 
   /**
    * Returns the agent rest operations interface.
@@ -165,6 +182,24 @@
    */
   void applyInitialRegistryDefinitions(URL unsecureWebAPI,
                                        URL secureWebAPI,
-                                       ServiceInstanceData registryInstanceData) throws MalformedURLException,
-      IOException;
+                                       ServiceInstanceData registryInstanceData)
+      throws IOException;
+
+  /**
+   * Create the container release selector for this provider...any policy
+   * can be implemented
+   * @return the selector to use for choosing containers.
+   */
+  ContainerReleaseSelector createContainerReleaseSelector();
+
+  /**
+   * On AM restart (for whatever reason) this API is required to rebuild the AM
+   * internal state with the containers which were already assigned and running
+   * 
+   * @param liveContainers
+   * @param applicationId
+   * @param providerRoles
+   */
+  void rebuildContainerDetails(List<Container> liveContainers,
+      String applicationId, Map<Integer, ProviderRole> providerRoles);
 }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java b/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java
index cb7d27a..4b8724a 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/ProviderUtils.java
@@ -23,6 +23,7 @@
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.slider.api.ClusterDescription;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.OptionKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.api.RoleKeys;
@@ -282,9 +283,9 @@
     MapOperations globalOptions =
       instanceDefinition.getInternalOperations().getGlobalOptions();
     String applicationHome =
-      globalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME);
+      globalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME);
     String imagePath =
-      globalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+      globalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
     return buildPathToHomeDir(imagePath, applicationHome, bindir, script);
   }
 
@@ -350,8 +351,8 @@
                                 String script) throws FileNotFoundException {
     
     String homedir = buildPathToHomeDir(
-      internal.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH),
-      internal.get(OptionKeys.INTERNAL_APPLICATION_HOME),
+      internal.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH),
+      internal.get(InternalKeys.INTERNAL_APPLICATION_HOME),
       bindir,
       script);
     return buildScriptPath(bindir, script, homedir);
@@ -418,7 +419,7 @@
     }
 
     log.debug("Found {} entries in {}", ls.length, base);
-    List<File> directories = new LinkedList<>();
+    List<File> directories = new LinkedList<File>();
     StringBuilder dirs = new StringBuilder();
     for (File file : ls) {
       log.debug("{}", false);
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java
index 3835df6..3a1ee76 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java
@@ -21,7 +21,7 @@
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.common.tools.SliderFileSystem;
@@ -103,7 +103,7 @@
         getGlobalOptions().get(AgentKeys.PACKAGE_PATH);
     if (SliderUtils.isUnset(appHome)) {
       String agentImage = instanceDefinition.getInternalOperations().
-          get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+          get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
       sliderFileSystem.verifyFileExists(new Path(agentImage));
     }
   }
@@ -168,12 +168,12 @@
     String appHome = instanceDefinition.getAppConfOperations().
         getGlobalOptions().get(AgentKeys.PACKAGE_PATH);
     String agentImage = instanceDefinition.getInternalOperations().
-        get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+        get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
 
     if (SliderUtils.isUnset(appHome) && SliderUtils.isUnset(agentImage)) {
       throw new BadConfigException("Either agent package path " +
                                    AgentKeys.PACKAGE_PATH + " or image root " +
-                                   OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH
+                                   InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH
                                    + " must be provided.");
     }
   }
@@ -210,7 +210,7 @@
     }
 
     Application application = metainfo.getApplication();
-    tags = new HashSet<>();
+    tags = new HashSet<String>();
     tags.add("Name: " + application.getName());
     tags.add("Version: " + application.getVersion());
     tags.add("Description: " + SliderUtils.truncate(application.getComment(), 80));
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
index 31d09c4..419fa1a 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
@@ -68,6 +68,8 @@
   String ARG_HOST = "--host";
   String ARG_PORT = "--port";
   String ARG_SECURED_PORT = "--secured_port";
+  String ARG_ZOOKEEPER_QUORUM = "--zk-quorum";
+  String ARG_ZOOKEEPER_REGISTRY_PATH = "--zk-reg-path";
   String ARG_DEBUG = "--debug";
   String AGENT_MAIN_SCRIPT_ROOT = "./infra/agent/slider-agent/";
   String AGENT_MAIN_SCRIPT = "agent/main.py";
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java
index b839e58..c8b0e1d 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentLaunchParameter.java
@@ -94,7 +94,7 @@
             }
 
             if (trackers == null) {
-              trackers = new HashMap<>(10);
+              trackers = new HashMap<String, CommandTracker>(10);
             }
             String componentName = parameters[0];
             CommandTracker tracker = new CommandTracker(Arrays.copyOfRange(parameters, 1, parameters.length));
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
index c1719b7..c7a82d3 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
@@ -19,16 +19,19 @@
 package org.apache.slider.providers.agent;
 
 import com.google.common.annotations.VisibleForTesting;
+import org.apache.curator.utils.ZKPaths;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.ClusterDescriptionKeys;
 import org.apache.slider.api.ClusterNode;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.OptionKeys;
 import org.apache.slider.api.StatusKeys;
 import org.apache.slider.common.SliderKeys;
@@ -38,6 +41,8 @@
 import org.apache.slider.core.conf.ConfTreeOperations;
 import org.apache.slider.core.conf.MapOperations;
 import org.apache.slider.core.exceptions.BadCommandArgumentsException;
+import org.apache.slider.core.exceptions.BadConfigException;
+import org.apache.slider.core.exceptions.NoSuchNodeException;
 import org.apache.slider.core.exceptions.SliderException;
 import org.apache.slider.core.launch.CommandLineBuilder;
 import org.apache.slider.core.launch.ContainerLauncher;
@@ -52,9 +57,16 @@
 import org.apache.slider.providers.ProviderUtils;
 import org.apache.slider.providers.agent.application.metadata.Application;
 import org.apache.slider.providers.agent.application.metadata.Component;
+import org.apache.slider.providers.agent.application.metadata.ComponentExport;
 import org.apache.slider.providers.agent.application.metadata.Export;
 import org.apache.slider.providers.agent.application.metadata.ExportGroup;
 import org.apache.slider.providers.agent.application.metadata.Metainfo;
+import org.apache.slider.providers.agent.application.metadata.OSPackage;
+import org.apache.slider.providers.agent.application.metadata.OSSpecific;
+import org.apache.slider.server.appmaster.actions.ProviderReportedContainerLoss;
+import org.apache.slider.server.appmaster.actions.RegisterComponentInstance;
+import org.apache.slider.server.appmaster.state.ContainerPriority;
+import org.apache.slider.server.appmaster.state.RoleInstance;
 import org.apache.slider.server.appmaster.state.StateAccessForProviders;
 import org.apache.slider.server.appmaster.web.rest.agent.AgentCommandType;
 import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations;
@@ -85,8 +97,10 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.apache.slider.server.appmaster.web.rest.RestPaths.SLIDER_PATH_AGENTS;
@@ -107,25 +121,37 @@
   private static final String CONTAINER_ID = "container_id";
   private static final String GLOBAL_CONFIG_TAG = "global";
   private static final String LOG_FOLDERS_TAG = "LogFolders";
+  private static final String COMPONENT_DATA_TAG = "ComponentInstanceData";
+  private static final String SHARED_PORT_TAG = "SHARED";
+  private static final String DO_NOT_PROPAGATE_TAG = "{DO_NOT_PROPAGATE}";
   private static final int MAX_LOG_ENTRIES = 20;
   private static final int DEFAULT_HEARTBEAT_MONITOR_INTERVAL = 60 * 1000;
+
   private final Object syncLock = new Object();
-  private final Map<String, String> allocatedPorts = new ConcurrentHashMap<>();
   private int heartbeatMonitorInterval = 0;
   private AgentClientProvider clientProvider;
-  private Map<String, ComponentInstanceState> componentStatuses = new ConcurrentHashMap<>();
   private AtomicInteger taskId = new AtomicInteger(0);
   private volatile Metainfo metainfo = null;
   private ComponentCommandOrder commandOrder = null;
   private HeartbeatMonitor monitor;
-  private Map<String, String> workFolders =
+  private Boolean canAnyMasterPublish = null;
+  private AgentLaunchParameter agentLaunchParameter = null;
+  private String clusterName = null;
+
+  private final Map<String, ComponentInstanceState> componentStatuses =
+      new ConcurrentHashMap<String, ComponentInstanceState>();
+  private final Map<String, Map<String, String>> componentInstanceData =
+      new ConcurrentHashMap<String, Map<String, String>>();
+  private final Map<String, Map<String, String>> exportGroups =
+      new ConcurrentHashMap<String, Map<String, String>>();
+  private final Map<String, Map<String, String>> allocatedPorts =
+      new ConcurrentHashMap<String, Map<String, String>>();
+  private final Map<String, String> workFolders =
       Collections.synchronizedMap(new LinkedHashMap<String, String>(MAX_LOG_ENTRIES, 0.75f, false) {
         protected boolean removeEldestEntry(Map.Entry eldest) {
           return size() > MAX_LOG_ENTRIES;
         }
       });
-  private Boolean canAnyMasterPublish = null;
-  private AgentLaunchParameter agentLaunchParameter = null;
 
   /**
    * Create an instance of AgentProviderService
@@ -161,6 +187,40 @@
     clientProvider.validateInstanceDefinition(instanceDefinition);
   }
 
+  // Reads the metainfo.xml in the application package and loads it
+  private void buildMetainfo(AggregateConf instanceDefinition,
+      SliderFileSystem fileSystem) throws IOException, SliderException {
+    String appDef = instanceDefinition.getAppConfOperations()
+        .getGlobalOptions().getMandatoryOption(AgentKeys.APP_DEF);
+
+    if (metainfo == null) {
+      synchronized (syncLock) {
+        if (metainfo == null) {
+          readAndSetHeartbeatMonitoringInterval(instanceDefinition);
+          initializeAgentDebugCommands(instanceDefinition);
+
+          metainfo = getApplicationMetainfo(fileSystem, appDef);
+          if (metainfo == null || metainfo.getApplication() == null) {
+            log.error("metainfo.xml is unavailable or malformed at {}.", appDef);
+            throw new SliderException(
+                "metainfo.xml is required in app package.");
+          }
+          commandOrder = new ComponentCommandOrder(metainfo.getApplication()
+              .getCommandOrder());
+          monitor = new HeartbeatMonitor(this, getHeartbeatMonitorInterval());
+          monitor.start();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void initializeApplicationConfiguration(
+      AggregateConf instanceDefinition, SliderFileSystem fileSystem)
+      throws IOException, SliderException {
+    buildMetainfo(instanceDefinition, fileSystem);
+  }
+
   @Override
   public void buildContainerLaunchContext(ContainerLauncher launcher,
                                           AggregateConf instanceDefinition,
@@ -177,24 +237,7 @@
     String appDef = instanceDefinition.getAppConfOperations().
         getGlobalOptions().getMandatoryOption(AgentKeys.APP_DEF);
 
-    if (metainfo == null) {
-      synchronized (syncLock) {
-        if (metainfo == null) {
-          readAndSetHeartbeatMonitoringInterval(instanceDefinition);
-          initializeAgentDebugCommands(instanceDefinition);
-
-          metainfo = getApplicationMetainfo(fileSystem, appDef);
-          if (metainfo == null || metainfo.getApplication() == null) {
-            log.error("metainfo.xml is unavailable or malformed at {}.", appDef);
-            throw new SliderException("metainfo.xml is required in app package.");
-          }
-
-          commandOrder = new ComponentCommandOrder(metainfo.getApplication().getCommandOrder());
-          monitor = new HeartbeatMonitor(this, getHeartbeatMonitorInterval());
-          monitor.start();
-        }
-      }
-    }
+    initializeApplicationConfiguration(instanceDefinition, fileSystem);
 
     log.info("Build launch context for Agent");
     log.debug(instanceDefinition.toString());
@@ -208,7 +251,9 @@
     String logDir = ApplicationConstants.Environment.LOG_DIRS.$();
     launcher.setEnv("AGENT_LOG_ROOT", logDir);
     log.info("AGENT_LOG_ROOT set to {}", logDir);
-    launcher.setEnv(HADOOP_USER_NAME, System.getenv(HADOOP_USER_NAME));
+    if (System.getenv(HADOOP_USER_NAME) != null) {
+      launcher.setEnv(HADOOP_USER_NAME, System.getenv(HADOOP_USER_NAME));
+    }
     // for 2-Way SSL
     launcher.setEnv(SLIDER_PASSPHRASE, SliderKeys.PASSPHRASE);
 
@@ -222,8 +267,15 @@
       scriptPath = new File(appHome, AgentKeys.AGENT_MAIN_SCRIPT).getPath();
     }
 
+    // set PYTHONPATH
+    List<String> pythonPaths = new ArrayList<String>();
+    pythonPaths.add(AgentKeys.AGENT_MAIN_SCRIPT_ROOT);
+    String pythonPath = StringUtils.join(File.pathSeparator, pythonPaths);
+    launcher.setEnv(PYTHONPATH, pythonPath);
+    log.info("PYTHONPATH set to {}", pythonPath);
+
     String agentImage = instanceDefinition.getInternalOperations().
-        get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+        get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
     if (agentImage != null) {
       LocalResource agentImageRes = fileSystem.createAmResource(new Path(agentImage), LocalResourceType.ARCHIVE);
       launcher.addLocalResource(AgentKeys.AGENT_INSTALL_DIR, agentImageRes);
@@ -260,12 +312,10 @@
 
     operation.add(scriptPath);
     operation.add(ARG_LABEL, label);
-    operation.add(ARG_HOST);
-    operation.add(getClusterInfoPropertyValue(StatusKeys.INFO_AM_HOSTNAME));
-    operation.add(ARG_PORT);
-    operation.add(getClusterInfoPropertyValue(StatusKeys.INFO_AM_AGENT_PORT));
-    operation.add(ARG_SECURED_PORT);
-    operation.add(getClusterInfoPropertyValue(StatusKeys.INFO_AM_SECURED_AGENT_PORT));
+    operation.add(ARG_ZOOKEEPER_QUORUM);
+    operation.add(getClusterOptionPropertyValue(OptionKeys.ZOOKEEPER_QUORUM));
+    operation.add(ARG_ZOOKEEPER_REGISTRY_PATH);
+    operation.add(getZkRegistryPath());
 
     String debugCmd = agentLaunchParameter.getNextLaunchParameter(role);
     if (debugCmd != null && debugCmd.length() != 0) {
@@ -276,11 +326,303 @@
     launcher.addCommand(operation.build());
 
     // initialize the component instance state
-    componentStatuses.put(label,
-                          new ComponentInstanceState(
-                              role,
-                              container.getId().toString(),
-                              getClusterInfoPropertyValue(OptionKeys.APPLICATION_NAME)));
+    getComponentStatuses().put(label,
+                               new ComponentInstanceState(
+                                   role,
+                                   container.getId(),
+                                   getClusterInfoPropertyValue(OptionKeys.APPLICATION_NAME)));
+  }
+
+  // build the zookeeper registry path
+  private String getZkRegistryPath() {
+    String zkRegistryRoot = getConfig().get(REGISTRY_PATH,
+        DEFAULT_REGISTRY_PATH);
+    String appType = APP_TYPE;
+    String zkRegistryPath = ZKPaths.makePath(zkRegistryRoot, appType);
+    String clusterName = getAmState().getInternalsSnapshot().get(
+        OptionKeys.APPLICATION_NAME);
+    zkRegistryPath = ZKPaths.makePath(zkRegistryPath, clusterName);
+    return zkRegistryPath;
+  }
+
+  @Override
+  public void rebuildContainerDetails(List<Container> liveContainers,
+      String applicationId, Map<Integer, ProviderRole> providerRoleMap) {
+    for (Container container : liveContainers) {
+      // get the role name and label
+      ProviderRole role = providerRoleMap.get(ContainerPriority
+          .extractRole(container));
+      if (role != null) {
+        String roleName = role.name;
+        String label = getContainerLabel(container, roleName);
+        log.info("Rebuilding in-memory: container {} in role {} in cluster {}",
+            container.getId(), roleName, applicationId);
+        getComponentStatuses().put(
+            label,
+            new ComponentInstanceState(roleName, container.getId(),
+                applicationId));
+      } else {
+        log.warn("Role not found for container {} in cluster {}",
+            container.getId(), applicationId);
+      }
+    }
+  }
+
+  /**
+   * Run this service
+   *
+   * @param instanceDefinition component description
+   * @param confDir            local dir with the config
+   * @param env                environment variables above those generated by
+   * @param execInProgress     callback for the event notification
+   *
+   * @throws IOException     IO problems
+   * @throws SliderException anything internal
+   */
+  @Override
+  public boolean exec(AggregateConf instanceDefinition,
+                      File confDir,
+                      Map<String, String> env,
+                      ProviderCompleted execInProgress) throws
+      IOException,
+      SliderException {
+
+    return false;
+  }
+
+  @Override
+  public boolean isSupportedRole(String role) {
+    return true;
+  }
+
+  /**
+   * Handle registration calls from the agents
+   * @param registration
+   * @return
+   */
+  @Override
+  public RegistrationResponse handleRegistration(Register registration) {
+    log.info("Handling registration: " + registration);
+    RegistrationResponse response = new RegistrationResponse();
+    String label = registration.getHostname();
+    State agentState = registration.getActualState();
+    if (getComponentStatuses().containsKey(label)) {
+      response.setResponseStatus(RegistrationStatus.OK);
+      ComponentInstanceState componentStatus = getComponentStatuses().get(label);
+      componentStatus.heartbeat(System.currentTimeMillis());
+      updateComponentStatusWithAgentState(componentStatus, agentState);
+
+      Map<String, String> ports = registration.getAllocatedPorts();
+      if (ports != null && !ports.isEmpty()) {
+        String roleName = getRoleName(label);
+        String containerId = getContainerId(label);
+        processAllocatedPorts(registration.getPublicHostname(), roleName, containerId, ports);
+      }
+    } else {
+      response.setResponseStatus(RegistrationStatus.FAILED);
+      response.setLog("Label not recognized.");
+      log.warn("Received registration request from unknown label {}", label);
+    }
+    log.info("Registration response: " + response);
+    return response;
+  }
+
+  /**
+   * Handle heartbeat response from agents
+   * @param heartBeat
+   * @return
+   */
+  @Override
+  public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
+    log.debug("Handling heartbeat: " + heartBeat);
+    HeartBeatResponse response = new HeartBeatResponse();
+    long id = heartBeat.getResponseId();
+    response.setResponseId(id + 1L);
+
+    String label = heartBeat.getHostname();
+    String roleName = getRoleName(label);
+    String containerId = getContainerId(label);
+
+    StateAccessForProviders accessor = getAmState();
+    String scriptPath = getScriptPathFromMetainfo(roleName);
+
+    if (scriptPath == null) {
+      log.error("role.script is unavailable for " + roleName + ". Commands will not be sent.");
+      return response;
+    }
+
+    if (!getComponentStatuses().containsKey(label)) {
+      return response;
+    }
+
+    Boolean isMaster = isMaster(roleName);
+    ComponentInstanceState componentStatus = getComponentStatuses().get(label);
+    componentStatus.heartbeat(System.currentTimeMillis());
+
+    publishConfigAndExportGroups(heartBeat, componentStatus, roleName);
+
+    List<CommandReport> reports = heartBeat.getReports();
+    if (reports != null && !reports.isEmpty()) {
+      CommandReport report = reports.get(0);
+      Map<String, String> ports = report.getAllocatedPorts();
+      if (ports != null && !ports.isEmpty()) {
+        processAllocatedPorts(heartBeat.getFqdn(), roleName, containerId, ports);
+      }
+      CommandResult result = CommandResult.getCommandResult(report.getStatus());
+      Command command = Command.getCommand(report.getRoleCommand());
+      componentStatus.applyCommandResult(result, command);
+      log.info("Component operation. Status: {}", result);
+
+      if (command == Command.INSTALL && report.getFolders() != null && report.getFolders().size() > 0) {
+        publishLogFolderPaths(report.getFolders(), containerId, heartBeat.getFqdn());
+      }
+    }
+
+    int waitForCount = accessor.getInstanceDefinitionSnapshot().
+        getAppConfOperations().getComponentOptInt(roleName, AgentKeys.WAIT_HEARTBEAT, 0);
+
+    if (id < waitForCount) {
+      log.info("Waiting until heartbeat count {}. Current val: {}", waitForCount, id);
+      getComponentStatuses().put(roleName, componentStatus);
+      return response;
+    }
+
+    Command command = componentStatus.getNextCommand();
+    try {
+      if (Command.NOP != command) {
+        if (command == Command.INSTALL) {
+          log.info("Installing {} on {}.", roleName, containerId);
+          addInstallCommand(roleName, containerId, response, scriptPath);
+          componentStatus.commandIssued(command);
+        } else if (command == Command.START) {
+          // check against dependencies
+          boolean canExecute = commandOrder.canExecute(roleName, command, getComponentStatuses().values());
+          if (canExecute) {
+            log.info("Starting {} on {}.", roleName, containerId);
+            addStartCommand(roleName, containerId, response, scriptPath, isMarkedAutoRestart(roleName));
+            componentStatus.commandIssued(command);
+          } else {
+            log.info("Start of {} on {} delayed as dependencies have not started.", roleName, containerId);
+          }
+        }
+      }
+
+      // if there is no outstanding command then retrieve config
+      if (isMaster && componentStatus.getState() == State.STARTED
+          && command == Command.NOP) {
+        if (!componentStatus.getConfigReported()) {
+          log.info("Requesting applied config for {} on {}.", roleName, containerId);
+          addGetConfigCommand(roleName, containerId, response);
+        }
+      }
+
+      // if restart is required then signal
+      response.setRestartEnabled(false);
+      if (componentStatus.getState() == State.STARTED
+          && command == Command.NOP && isMarkedAutoRestart(roleName)) {
+        response.setRestartEnabled(true);
+      }
+    } catch (SliderException e) {
+      componentStatus.applyCommandResult(CommandResult.FAILED, command);
+      log.warn("Component instance failed operation.", e);
+    }
+
+    log.debug("Heartbeat response: " + response);
+    return response;
+  }
+
+  protected void processAllocatedPorts(String fqdn,
+                                     String roleName,
+                                     String containerId,
+                                     Map<String, String> ports) {
+    RoleInstance instance;
+    try {
+      instance = getAmState().getOwnedContainer(containerId);
+    } catch (NoSuchNodeException e) {
+      log.warn("Failed to locate instance of container {}: {}", containerId, e);
+      instance = null;
+    }
+    for (Map.Entry<String, String> port : ports.entrySet()) {
+      String portname = port.getKey();
+      String portNo = port.getValue();
+      log.info("Recording allocated port for {} as {}", portname, portNo);
+      this.getAllocatedPorts().put(portname, portNo);
+      this.getAllocatedPorts(containerId).put(portname, portNo);
+        if (instance!=null) {
+          try {
+            instance.registerPortEndpoint(Integer.valueOf(portNo), portname, "");
+          } catch (NumberFormatException e) {
+            log.warn("Failed to parse {}: {}", portNo, e);
+          }
+        }
+    }
+
+    // component specific publishes
+    processAndPublishComponentSpecificData(ports, containerId, fqdn, roleName);
+    
+    // and update registration entries
+    if (instance != null) {
+      queueAccess.put(new RegisterComponentInstance(instance.getId(), 0,
+          TimeUnit.MILLISECONDS));
+    }
+  }
+
+  private void updateComponentStatusWithAgentState(
+      ComponentInstanceState componentStatus, State agentState) {
+    if (agentState != null) {
+      componentStatus.setState(agentState);
+    }
+  }
+
+  @Override
+  public Map<String, String> buildMonitorDetails(ClusterDescription clusterDesc) {
+    Map<String, String> details = super.buildMonitorDetails(clusterDesc);
+    buildRoleHostDetails(details);
+    return details;
+  }
+
+  @Override
+  public void applyInitialRegistryDefinitions(URL unsecureWebAPI,
+                                              URL secureWebAPI,
+                                              ServiceInstanceData instanceData) throws IOException {
+    super.applyInitialRegistryDefinitions(unsecureWebAPI,
+                                          secureWebAPI,
+                                          instanceData
+    );
+
+    try {
+      instanceData.internalView.endpoints.put(
+          CustomRegistryConstants.AGENT_REST_API,
+          new RegisteredEndpoint(
+              new URL(secureWebAPI, SLIDER_PATH_AGENTS),
+              "Agent REST API"));
+    } catch (URISyntaxException e) {
+      throw new IOException(e);
+    }
+  }
+
+  @Override
+  public void notifyContainerCompleted(ContainerId containerId) {
+    if (containerId != null) {
+      String containerIdStr = containerId.toString();
+      if (getComponentInstanceData().containsKey(containerIdStr)) {
+        getComponentInstanceData().remove(containerIdStr);
+        log.info("Removing container specific data for {}", containerIdStr);
+        publishComponentInstanceData();
+      }
+
+      if (this.allocatedPorts.containsKey(containerIdStr)) {
+        this.allocatedPorts.remove(containerIdStr);
+      }
+
+      synchronized (this.componentStatuses) {
+        for (String label : getComponentStatuses().keySet()) {
+          if (label.startsWith(containerIdStr)) {
+            getComponentStatuses().remove(label);
+          }
+        }
+      }
+    }
   }
 
   /**
@@ -337,14 +679,21 @@
     return this.heartbeatMonitorInterval;
   }
 
+  private String getClusterName() {
+    if (clusterName == null || clusterName.length() == 0) {
+      clusterName = getAmState().getInternalsSnapshot().get(OptionKeys.APPLICATION_NAME);
+    }
+    return clusterName;
+  }
+
   /**
-   * Publish a named config bag that may contain name-value pairs for app configurations such as hbase-site
+   * Publish a named property bag that may contain name-value pairs for app configurations such as hbase-site
    * @param name
    * @param description
    * @param entries
    */
-  protected void publishComponentConfiguration(String name, String description,
-                                               Iterable<Map.Entry<String, String>> entries) {
+  protected void publishApplicationInstanceData(String name, String description,
+                                                Iterable<Map.Entry<String, String>> entries) {
     PublishedConfiguration pubconf = new PublishedConfiguration();
     pubconf.description = description;
     pubconf.putValues(entries);
@@ -374,45 +723,26 @@
     return description.getInfo(name);
   }
 
-  /**
-   * Lost heartbeat from the container - release it and ask for a replacement
-   *
-   * @param label
-   *
-   * @return if release is requested successfully
-   */
-  protected boolean releaseContainer(String label) {
-    componentStatuses.remove(label);
-    try {
-      getAppMaster().refreshContainer(getContainerId(label), true);
-    } catch (SliderException e) {
-      log.info("Error while requesting container release for {}. Message: {}", label, e.getMessage());
-      return false;
-    }
-
-    return true;
+  protected String getClusterOptionPropertyValue(String name)
+      throws BadConfigException {
+    StateAccessForProviders accessor = getAmState();
+    assert accessor.isApplicationLive();
+    ClusterDescription description = accessor.getClusterStatus();
+    return description.getMandatoryOption(name);
   }
 
   /**
-   * Run this service
+   * Lost heartbeat from the container - release it and ask for a replacement
+   * (async operation)
+   *  @param label
+   * @param containerId
    *
-   * @param instanceDefinition component description
-   * @param confDir            local dir with the config
-   * @param env                environment variables above those generated by
-   * @param execInProgress     callback for the event notification
-   *
-   * @throws IOException     IO problems
-   * @throws SliderException anything internal
    */
-  @Override
-  public boolean exec(AggregateConf instanceDefinition,
-                      File confDir,
-                      Map<String, String> env,
-                      ProviderCompleted execInProgress) throws
-      IOException,
-      SliderException {
-
-    return false;
+  protected void lostContainer(
+      String label,
+      ContainerId containerId) {
+    getComponentStatuses().remove(label);
+    getQueueAccess().put(new ProviderReportedContainerLoss(containerId));
   }
 
   /**
@@ -421,174 +751,88 @@
    * @return the provider status - map of entries to add to the info section
    */
   public Map<String, String> buildProviderStatus() {
-    Map<String, String> stats = new HashMap<>();
+    Map<String, String> stats = new HashMap<String, String>();
     return stats;
   }
 
-  @Override
-  public boolean isSupportedRole(String role) {
-    return true;
-  }
 
   /**
-   * Handle registration calls from the agents
-   * @param registration
-   * @return
-   */
-  @Override
-  public RegistrationResponse handleRegistration(Register registration) {
-    RegistrationResponse response = new RegistrationResponse();
-    String label = registration.getHostname();
-    if (componentStatuses.containsKey(label)) {
-      response.setResponseStatus(RegistrationStatus.OK);
-      componentStatuses.get(label).setLastHeartbeat(System.currentTimeMillis());
-    } else {
-      response.setResponseStatus(RegistrationStatus.FAILED);
-      response.setLog("Label not recognized.");
-    }
-    return response;
-  }
-
-  /**
-   * Handle heartbeat response from agents
-   * @param heartBeat
-   * @return
-   */
-  @Override
-  public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
-    HeartBeatResponse response = new HeartBeatResponse();
-    long id = heartBeat.getResponseId();
-    response.setResponseId(id + 1L);
-
-    String label = heartBeat.getHostname();
-    String roleName = getRoleName(label);
-
-    String containerId = getContainerId(label);
-    StateAccessForProviders accessor = getAmState();
-    String scriptPath = getScriptPathFromMetainfo(roleName);
-
-    if (scriptPath == null) {
-      log.error("role.script is unavailable for " + roleName + ". Commands will not be sent.");
-      return response;
-    }
-
-    if (!componentStatuses.containsKey(label)) {
-      return response;
-    }
-
-    Boolean isMaster = isMaster(roleName);
-    ComponentInstanceState componentStatus = componentStatuses.get(label);
-    componentStatus.setLastHeartbeat(System.currentTimeMillis());
-    // If no Master can explicitly publish then publish if its a master
-    // Otherwise, wait till the master that can publish is ready
-    if (isMaster &&
-        (canAnyMasterPublishConfig() == false || canPublishConfig(roleName))) {
-      processReturnedStatus(heartBeat, componentStatus);
-    }
-
-    List<CommandReport> reports = heartBeat.getReports();
-    if (reports != null && !reports.isEmpty()) {
-      CommandReport report = reports.get(0);
-      Map<String, String> ports = report.getAllocatedPorts();
-      if (ports != null && !ports.isEmpty()) {
-        for (Map.Entry<String, String> port : ports.entrySet()) {
-          log.info("Recording allocated port for {} as {}", port.getKey(), port.getValue());
-          this.allocatedPorts.put(port.getKey(), port.getValue());
-        }
-      }
-      CommandResult result = CommandResult.getCommandResult(report.getStatus());
-      Command command = Command.getCommand(report.getRoleCommand());
-      componentStatus.applyCommandResult(result, command);
-      log.info("Component operation. Status: {}", result);
-
-      if (command == Command.INSTALL && report.getFolders() != null && report.getFolders().size() > 0) {
-        processFolderPaths(report.getFolders(), containerId, heartBeat.getFqdn());
-      }
-    }
-
-    int waitForCount = accessor.getInstanceDefinitionSnapshot().
-        getAppConfOperations().getComponentOptInt(roleName, AgentKeys.WAIT_HEARTBEAT, 0);
-
-    if (id < waitForCount) {
-      log.info("Waiting until heartbeat count {}. Current val: {}", waitForCount, id);
-      componentStatuses.put(roleName, componentStatus);
-      return response;
-    }
-
-    Command command = componentStatus.getNextCommand();
-    try {
-      if (Command.NOP != command) {
-        if (command == Command.INSTALL) {
-          log.info("Installing {} on {}.", roleName, containerId);
-          addInstallCommand(roleName, containerId, response, scriptPath);
-          componentStatus.commandIssued(command);
-        } else if (command == Command.START) {
-          // check against dependencies
-          boolean canExecute = commandOrder.canExecute(roleName, command, componentStatuses.values());
-          if (canExecute) {
-            log.info("Starting {} on {}.", roleName, containerId);
-            addStartCommand(roleName, containerId, response, scriptPath);
-            componentStatus.commandIssued(command);
-          } else {
-            log.info("Start of {} on {} delayed as dependencies have not started.", roleName, containerId);
-          }
-        }
-      }
-      // if there is no outstanding command then retrieve config
-      if (isMaster && componentStatus.getState() == State.STARTED
-          && command == Command.NOP) {
-        if (!componentStatus.getConfigReported()) {
-          addGetConfigCommand(roleName, containerId, response);
-        }
-      }
-    } catch (SliderException e) {
-      componentStatus.applyCommandResult(CommandResult.FAILED, command);
-      log.warn("Component instance failed operation.", e);
-    }
-
-    return response;
-  }
-
-  /**
-   * Format the folder locations before publishing in the registry service
+   * Format the folder locations and publish in the registry service
    * @param folders
    * @param containerId
    * @param hostFqdn
    */
-  private void processFolderPaths(Map<String, String> folders, String containerId, String hostFqdn) {
+  private void publishLogFolderPaths(Map<String, String> folders, String containerId, String hostFqdn) {
     for (String key : folders.keySet()) {
       workFolders.put(String.format("%s-%s-%s", hostFqdn, containerId, key), folders.get(key));
     }
 
-    publishComponentConfiguration(LOG_FOLDERS_TAG, LOG_FOLDERS_TAG, (new HashMap<>(this.workFolders)).entrySet());
+    publishApplicationInstanceData(LOG_FOLDERS_TAG, LOG_FOLDERS_TAG,
+        (new HashMap<String, String>(this.workFolders)).entrySet());
   }
 
+
   /**
    * Process return status for component instances
+   *
    * @param heartBeat
    * @param componentStatus
    */
-  protected void processReturnedStatus(HeartBeat heartBeat, ComponentInstanceState componentStatus) {
+  protected void publishConfigAndExportGroups(
+      HeartBeat heartBeat, ComponentInstanceState componentStatus, String roleName) {
     List<ComponentStatus> statuses = heartBeat.getComponentStatus();
     if (statuses != null && !statuses.isEmpty()) {
       log.info("Processing {} status reports.", statuses.size());
       for (ComponentStatus status : statuses) {
         log.info("Status report: " + status.toString());
+
         if (status.getConfigs() != null) {
-          for (String key : status.getConfigs().keySet()) {
-            Map<String, String> configs = status.getConfigs().get(key);
-            publishComponentConfiguration(key, key, configs.entrySet());
+          Application application = getMetainfo().getApplication();
+
+          if (canAnyMasterPublishConfig() == false || canPublishConfig(roleName)) {
+            // If no Master can explicitly publish then publish if its a master
+            // Otherwise, wait till the master that can publish is ready
+
+            Set<String> exportedConfigs = new HashSet();
+            String exportedConfigsStr = application.getExportedConfigs();
+            boolean exportedAllConfigs = exportedConfigsStr == null || exportedConfigsStr.isEmpty();
+            if (!exportedAllConfigs) {
+              for (String exportedConfig : exportedConfigsStr.split(",")) {
+                if (exportedConfig.trim().length() > 0) {
+                  exportedConfigs.add(exportedConfig.trim());
+                }
+              }
+            }
+
+            for (String key : status.getConfigs().keySet()) {
+              if ((!exportedAllConfigs && exportedConfigs.contains(key)) ||
+                  exportedAllConfigs) {
+                Map<String, String> configs = status.getConfigs().get(key);
+                publishApplicationInstanceData(key, key, configs.entrySet());
+              }
+            }
           }
 
-          Application application = getMetainfo().getApplication();
           List<ExportGroup> exportGroups = application.getExportGroups();
-          if (exportGroups != null && !exportGroups.isEmpty()) {
+          boolean hasExportGroups = exportGroups != null && !exportGroups.isEmpty();
 
+          Set<String> appExports = new HashSet();
+          String appExportsStr = getApplicationComponent(roleName).getAppExports();
+          boolean hasNoAppExports = appExportsStr == null || appExportsStr.isEmpty();
+          if (!hasNoAppExports) {
+            for (String appExport : appExportsStr.split(",")) {
+              if (appExport.trim().length() > 0) {
+                appExports.add(appExport.trim());
+              }
+            }
+          }
+
+          if (hasExportGroups && appExports.size() > 0) {
             String configKeyFormat = "${site.%s.%s}";
             String hostKeyFormat = "${%s_HOST}";
 
             // publish export groups if any
-            Map<String, String> replaceTokens = new HashMap<>();
+            Map<String, String> replaceTokens = new HashMap<String, String>();
             for (Map.Entry<String, Map<String, ClusterNode>> entry : getRoleClusterNodeMapping().entrySet()) {
               String hostName = getHostsList(entry.getValue().values(), true).iterator().next();
               replaceTokens.put(String.format(hostKeyFormat, entry.getKey().toUpperCase(Locale.ENGLISH)), hostName);
@@ -602,32 +846,146 @@
               }
             }
 
+            Set<String> modifiedGroups = new HashSet<String>();
             for (ExportGroup exportGroup : exportGroups) {
               List<Export> exports = exportGroup.getExports();
               if (exports != null && !exports.isEmpty()) {
                 String exportGroupName = exportGroup.getName();
-                Map<String, String> map = new HashMap<>();
+                Map<String, String> map = getCurrentExports(exportGroupName);
                 for (Export export : exports) {
-                  String value = export.getValue();
-                  // replace host names
-                  for (String token : replaceTokens.keySet()) {
-                    if (value.contains(token)) {
-                      value = value.replace(token, replaceTokens.get(token));
+                  if (canBeExported(exportGroupName, export.getName(), appExports)) {
+                    String value = export.getValue();
+                    // replace host names
+                    for (String token : replaceTokens.keySet()) {
+                      if (value.contains(token)) {
+                        value = value.replace(token, replaceTokens.get(token));
+                      }
                     }
+                    map.put(export.getName(), value);
+                    log.info("Preparing to publish. Key {} and Value {}", export.getName(), value);
                   }
-                  map.put(export.getName(), value);
-                  log.info("Preparing to publish. Key {} and Value {}", export.getName(), value);
                 }
-                publishComponentConfiguration(exportGroupName, exportGroupName, map.entrySet());
+                modifiedGroups.add(exportGroupName);
               }
             }
+            publishModifiedExportGroups(modifiedGroups);
           }
+
+          log.info("Received and processed config for {}", heartBeat.getHostname());
           componentStatus.setConfigReported(true);
+
         }
       }
     }
   }
 
+  private boolean canBeExported(String exportGroupName, String name, Set<String> appExports) {
+    return  appExports.contains(String.format("%s-%s", exportGroupName, name));
+  }
+
+  protected Map<String, String> getCurrentExports(String groupName) {
+    if(!this.exportGroups.containsKey(groupName)) {
+       synchronized (this.exportGroups) {
+         if(!this.exportGroups.containsKey(groupName)) {
+           this.exportGroups.put(groupName, new ConcurrentHashMap<String, String>());
+         }
+       }
+    }
+
+    return this.exportGroups.get(groupName);
+  }
+
+  private void publishModifiedExportGroups(Set<String> modifiedGroups) {
+    synchronized (this.exportGroups) {
+      for(String groupName : modifiedGroups) {
+        publishApplicationInstanceData(groupName, groupName, this.exportGroups.get(groupName).entrySet());
+      }
+    }
+  }
+
+  /** Publish component instance specific data if the component demands it */
+  protected void processAndPublishComponentSpecificData(Map<String, String> ports,
+                                                        String containerId,
+                                                        String hostFqdn,
+                                                        String roleName) {
+    String portVarFormat = "${site.%s}";
+    String hostNamePattern = "${THIS_HOST}";
+    Map<String, String> toPublish = new HashMap<String, String>();
+
+    Application application = getMetainfo().getApplication();
+    for (Component component : application.getComponents()) {
+      if (component.getName().equals(roleName)) {
+        if (component.getComponentExports().size() > 0) {
+
+          for (ComponentExport export : component.getComponentExports()) {
+            String templateToExport = export.getValue();
+            for (String portName : ports.keySet()) {
+              boolean publishData = false;
+              String portValPattern = String.format(portVarFormat, portName);
+              if (templateToExport.contains(portValPattern)) {
+                templateToExport = templateToExport.replace(portValPattern, ports.get(portName));
+                publishData = true;
+              }
+              if (templateToExport.contains(hostNamePattern)) {
+                templateToExport = templateToExport.replace(hostNamePattern, hostFqdn);
+                publishData = true;
+              }
+              if (publishData) {
+                toPublish.put(export.getName(), templateToExport);
+                log.info("Publishing {} for name {} and container {}",
+                         templateToExport, export.getName(), containerId);
+              }
+            }
+          }
+        }
+      }
+    }
+
+    if (toPublish.size() > 0) {
+      Map<String, String> perContainerData = null;
+      if (!getComponentInstanceData().containsKey(containerId)) {
+        perContainerData = new ConcurrentHashMap<String, String>();
+      } else {
+        perContainerData = getComponentInstanceData().get(containerId);
+      }
+      perContainerData.putAll(toPublish);
+      getComponentInstanceData().put(containerId, perContainerData);
+      publishComponentInstanceData();
+    }
+  }
+
+  private void publishComponentInstanceData() {
+    Map<String, String> dataToPublish = new HashMap<String, String>();
+    synchronized (this.componentInstanceData) {
+      for (String container : getComponentInstanceData().keySet()) {
+        for (String prop : getComponentInstanceData().get(container).keySet()) {
+          dataToPublish.put(
+              container + "." + prop, getComponentInstanceData().get(container).get(prop));
+        }
+      }
+    }
+    publishApplicationInstanceData(COMPONENT_DATA_TAG, COMPONENT_DATA_TAG, dataToPublish.entrySet());
+  }
+
+  /**
+   * Return Component based on name
+   * @param roleName
+   * @return
+   */
+  protected Component getApplicationComponent(String roleName) {
+    Application application = getMetainfo().getApplication();
+    if (application == null) {
+      log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
+    } else {
+      for (Component component : application.getComponents()) {
+        if (component.getName().equals(roleName)) {
+          return component;
+        }
+      }
+    }
+    return null;
+  }
+
   /**
    * Extract script path from the application metainfo
    *
@@ -636,19 +994,11 @@
    * @return
    */
   protected String getScriptPathFromMetainfo(String roleName) {
-    String scriptPath = null;
-    Application application = getMetainfo().getApplication();
-    if (application == null) {
-      log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
-      return scriptPath;
+    Component component = getApplicationComponent(roleName);
+    if (component != null) {
+      return component.getCommandScript().getScript();
     }
-    for (Component component : application.getComponents()) {
-      if (component.getName().equals(roleName)) {
-        scriptPath = component.getCommandScript().getScript();
-        break;
-      }
-    }
-    return scriptPath;
+    return null;
   }
 
   /**
@@ -659,18 +1009,10 @@
    * @return
    */
   protected boolean isMaster(String roleName) {
-    Application application = getMetainfo().getApplication();
-    if (application == null) {
-      log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
-    } else {
-      for (Component component : application.getComponents()) {
-        if (component.getName().equals(roleName)) {
-          if (component.getCategory().equals("MASTER")) {
-            return true;
-          } else {
-            return false;
-          }
-        }
+    Component component = getApplicationComponent(roleName);
+    if (component != null) {
+      if (component.getCategory().equals("MASTER")) {
+        return true;
       }
     }
     return false;
@@ -684,15 +1026,24 @@
    * @return
    */
   protected boolean canPublishConfig(String roleName) {
-    Application application = getMetainfo().getApplication();
-    if (application == null) {
-      log.error("Malformed app definition: Expect application as the top level element for metainfo.xml");
-    } else {
-      for (Component component : application.getComponents()) {
-        if (component.getName().equals(roleName)) {
-          return Boolean.TRUE.toString().equals(component.getPublishConfig());
-        }
-      }
+    Component component = getApplicationComponent(roleName);
+    if (component != null) {
+      return Boolean.TRUE.toString().equals(component.getPublishConfig());
+    }
+    return false;
+  }
+
+  /**
+   * Checks if the role is marked auto-restart
+   *
+   * @param roleName
+   *
+   * @return
+   */
+  protected boolean isMarkedAutoRestart(String roleName) {
+    Component component = getApplicationComponent(roleName);
+    if (component != null) {
+      return component.getRequiresAutoRestart();
     }
     return false;
   }
@@ -743,26 +1094,22 @@
       throws SliderException {
     assert getAmState().isApplicationLive();
     ConfTreeOperations appConf = getAmState().getAppConfSnapshot();
-    ConfTreeOperations resourcesConf = getAmState().getResourcesSnapshot();
-    ConfTreeOperations internalsConf = getAmState().getInternalsSnapshot();
 
     ExecutionCommand cmd = new ExecutionCommand(AgentCommandType.EXECUTION_COMMAND);
     prepareExecutionCommand(cmd);
-    String clusterName = internalsConf.get(OptionKeys.APPLICATION_NAME);
+    String clusterName = getClusterName();
     cmd.setClusterName(clusterName);
     cmd.setRoleCommand(Command.INSTALL.toString());
     cmd.setServiceName(clusterName);
     cmd.setComponentName(roleName);
     cmd.setRole(roleName);
-    Map<String, String> hostLevelParams = new TreeMap<>();
+    Map<String, String> hostLevelParams = new TreeMap<String, String>();
     hostLevelParams.put(JAVA_HOME, appConf.getGlobalOptions().getMandatoryOption(JAVA_HOME));
-    hostLevelParams.put(PACKAGE_LIST, "[{\"type\":\"tarball\",\"name\":\"" +
-                                      appConf.getGlobalOptions().getMandatoryOption(
-                                          PACKAGE_LIST) + "\"}]");
+    hostLevelParams.put(PACKAGE_LIST, getPackageList());
     hostLevelParams.put(CONTAINER_ID, containerId);
     cmd.setHostLevelParams(hostLevelParams);
 
-    setInstallCommandConfigurations(cmd);
+    setInstallCommandConfigurations(cmd, containerId);
 
     cmd.setCommandParams(setCommandParameters(scriptPath, false));
 
@@ -770,13 +1117,38 @@
     response.addExecutionCommand(cmd);
   }
 
+  private String getPackageList() {
+    String pkgFormatString = "{\"type\":\"%s\",\"name\":\"%s\"}";
+    String pkgListFormatString = "[%s]";
+    List<String> packages = new ArrayList();
+    Application application = getMetainfo().getApplication();
+    if (application != null) {
+      List<OSSpecific> osSpecifics = application.getOSSpecifics();
+      if (osSpecifics != null && osSpecifics.size() > 0) {
+        for (OSSpecific osSpecific : osSpecifics) {
+          if (osSpecific.getOsType().equals("any")) {
+            for (OSPackage osPackage : osSpecific.getPackages()) {
+              packages.add(String.format(pkgFormatString, osPackage.getType(), osPackage.getName()));
+            }
+          }
+        }
+      }
+    }
+
+    if (packages.size() > 0) {
+      return String.format(pkgListFormatString, StringUtils.join(",", packages));
+    } else {
+      return "[]";
+    }
+  }
+
   private void prepareExecutionCommand(ExecutionCommand cmd) {
     cmd.setTaskId(taskId.incrementAndGet());
     cmd.setCommandId(cmd.getTaskId() + "-1");
   }
 
   private Map<String, String> setCommandParameters(String scriptPath, boolean recordConfig) {
-    Map<String, String> cmdParams = new TreeMap<>();
+    Map<String, String> cmdParams = new TreeMap<String, String>();
     cmdParams.put("service_package_folder",
                   "${AGENT_WORK_ROOT}/work/app/definition/package");
     cmdParams.put("script", scriptPath);
@@ -787,9 +1159,9 @@
     return cmdParams;
   }
 
-  private void setInstallCommandConfigurations(ExecutionCommand cmd) throws SliderException {
+  private void setInstallCommandConfigurations(ExecutionCommand cmd, String containerId) throws SliderException {
     ConfTreeOperations appConf = getAmState().getAppConfSnapshot();
-    Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf);
+    Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf, containerId);
     cmd.setConfigurations(configurations);
   }
 
@@ -798,10 +1170,9 @@
       throws SliderException {
     assert getAmState().isApplicationLive();
     ConfTreeOperations appConf = getAmState().getAppConfSnapshot();
-    ConfTreeOperations internalsConf = getAmState().getInternalsSnapshot();
 
     StatusCommand cmd = new StatusCommand();
-    String clusterName = internalsConf.get(OptionKeys.APPLICATION_NAME);
+    String clusterName = getClusterName();
 
     cmd.setCommandType(AgentCommandType.STATUS_COMMAND);
     cmd.setComponentName(roleName);
@@ -809,14 +1180,14 @@
     cmd.setClusterName(clusterName);
     cmd.setRoleCommand(StatusCommand.STATUS_COMMAND);
 
-    Map<String, String> hostLevelParams = new TreeMap<>();
+    Map<String, String> hostLevelParams = new TreeMap<String, String>();
     hostLevelParams.put(JAVA_HOME, appConf.getGlobalOptions().getMandatoryOption(JAVA_HOME));
     hostLevelParams.put(CONTAINER_ID, containerId);
     cmd.setHostLevelParams(hostLevelParams);
 
     cmd.setCommandParams(setCommandParameters(scriptPath, false));
 
-    Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf);
+    Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf, containerId);
 
     cmd.setConfigurations(configurations);
 
@@ -827,17 +1198,16 @@
   protected void addGetConfigCommand(String roleName, String containerId, HeartBeatResponse response)
       throws SliderException {
     assert getAmState().isApplicationLive();
-    ConfTreeOperations internalsConf = getAmState().getInternalsSnapshot();
 
     StatusCommand cmd = new StatusCommand();
-    String clusterName = internalsConf.get(OptionKeys.APPLICATION_NAME);
+    String clusterName = getClusterName();
 
     cmd.setCommandType(AgentCommandType.STATUS_COMMAND);
     cmd.setComponentName(roleName);
     cmd.setServiceName(clusterName);
     cmd.setClusterName(clusterName);
     cmd.setRoleCommand(StatusCommand.GET_CONFIG_COMMAND);
-    Map<String, String> hostLevelParams = new TreeMap<>();
+    Map<String, String> hostLevelParams = new TreeMap<String, String>();
     hostLevelParams.put(CONTAINER_ID, containerId);
     cmd.setHostLevelParams(hostLevelParams);
 
@@ -847,7 +1217,8 @@
   }
 
   @VisibleForTesting
-  protected void addStartCommand(String roleName, String containerId, HeartBeatResponse response, String scriptPath)
+  protected void addStartCommand(String roleName, String containerId, HeartBeatResponse response,
+                                 String scriptPath, boolean isMarkedAutoRestart)
       throws
       SliderException {
     assert getAmState().isApplicationLive();
@@ -864,27 +1235,49 @@
     cmd.setServiceName(clusterName);
     cmd.setComponentName(roleName);
     cmd.setRole(roleName);
-    Map<String, String> hostLevelParams = new TreeMap<>();
+    Map<String, String> hostLevelParams = new TreeMap<String, String>();
     hostLevelParams.put(JAVA_HOME, appConf.getGlobalOptions().getMandatoryOption(JAVA_HOME));
     hostLevelParams.put(CONTAINER_ID, containerId);
     cmd.setHostLevelParams(hostLevelParams);
 
+    Map<String, String> roleParams = new TreeMap<String, String>();
+    cmd.setRoleParams(roleParams);
+    cmd.getRoleParams().put("auto_restart", Boolean.toString(isMarkedAutoRestart));
+
     cmd.setCommandParams(setCommandParameters(scriptPath, true));
 
-    Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf);
+    Map<String, Map<String, String>> configurations = buildCommandConfigurations(appConf, containerId);
 
     cmd.setConfigurations(configurations);
     response.addExecutionCommand(cmd);
   }
 
   protected Map<String, String> getAllocatedPorts() {
-    return this.allocatedPorts;
+    return getAllocatedPorts(SHARED_PORT_TAG);
   }
 
-  private Map<String, Map<String, String>> buildCommandConfigurations(ConfTreeOperations appConf)
+  protected Map<String, Map<String, String>> getComponentInstanceData() {
+    return this.componentInstanceData;
+  }
+
+  protected Map<String, String> getAllocatedPorts(String containerId) {
+    if (!this.allocatedPorts.containsKey(containerId)) {
+      synchronized (this.allocatedPorts) {
+        if (!this.allocatedPorts.containsKey(containerId)) {
+          this.allocatedPorts.put(containerId,
+              new ConcurrentHashMap<String, String>());
+        }
+      }
+    }
+    return this.allocatedPorts.get(containerId);
+  }
+
+  private Map<String, Map<String, String>> buildCommandConfigurations(
+      ConfTreeOperations appConf, String containerId)
       throws SliderException {
 
-    Map<String, Map<String, String>> configurations = new TreeMap<>();
+    Map<String, Map<String, String>> configurations =
+        new TreeMap<String, Map<String, String>>();
     Map<String, String> tokens = getStandardTokenMap(appConf);
 
     List<String> configs = getApplicationConfigurationTypes(appConf);
@@ -892,14 +1285,14 @@
     //Add global
     for (String configType : configs) {
       addNamedConfiguration(configType, appConf.getGlobalOptions().options,
-                            configurations, tokens);
+                            configurations, tokens, containerId);
     }
 
     return configurations;
   }
 
   private Map<String, String> getStandardTokenMap(ConfTreeOperations appConf) throws SliderException {
-    Map<String, String> tokens = new HashMap<>();
+    Map<String, String> tokens = new HashMap<String, String>();
     String nnuri = appConf.get("site.fs.defaultFS");
     tokens.put("${NN_URI}", nnuri);
     tokens.put("${NN_HOST}", URI.create(nnuri).getHost());
@@ -908,31 +1301,32 @@
     tokens.put("${DEFAULT_DATA_DIR}", getAmState()
         .getInternalsSnapshot()
         .getGlobalOptions()
-        .getMandatoryOption(OptionKeys.INTERNAL_DATA_DIR_PATH));
+        .getMandatoryOption(InternalKeys.INTERNAL_DATA_DIR_PATH));
     return tokens;
   }
 
   private List<String> getApplicationConfigurationTypes(ConfTreeOperations appConf) {
     // for now, reading this from appConf.  In the future, modify this method to
     // process metainfo.xml
-    List<String> configList = new ArrayList<>();
+    List<String> configList = new ArrayList<String>();
     configList.add(GLOBAL_CONFIG_TAG);
 
     String configTypes = appConf.get("config_types");
-    String[] configs = configTypes.split(",");
-
-    configList.addAll(Arrays.asList(configs));
+    if (configTypes != null && configTypes.length() > 0) {
+      String[] configs = configTypes.split(",");
+      configList.addAll(Arrays.asList(configs));
+    }
 
     // remove duplicates.  mostly worried about 'global' being listed
-    return new ArrayList<>(new HashSet<>(configList));
+    return new ArrayList<String>(new HashSet<String>(configList));
   }
 
   private void addNamedConfiguration(String configName, Map<String, String> sourceConfig,
                                      Map<String, Map<String, String>> configurations,
-                                     Map<String, String> tokens) {
-    Map<String, String> config = new HashMap<>();
+                                     Map<String, String> tokens, String containerId) {
+    Map<String, String> config = new HashMap<String, String>();
     if (configName.equals(GLOBAL_CONFIG_TAG)) {
-      addDefaultGlobalConfig(config);
+      addDefaultGlobalConfig(config, containerId);
     }
     // add role hosts to tokens
     addRoleRelatedTokens(tokens);
@@ -941,8 +1335,18 @@
     //apply any port updates
     if (!this.getAllocatedPorts().isEmpty()) {
       for (String key : config.keySet()) {
-        if (this.getAllocatedPorts().containsKey(key)) {
-          config.put(key, getAllocatedPorts().get(key));
+        String value = config.get(key);
+        String lookupKey = configName + "." + key;
+        if(!value.contains(DO_NOT_PROPAGATE_TAG)) {
+          // If the config property is shared then pass on the already allocated value
+          // from any container
+          if (this.getAllocatedPorts().containsKey(lookupKey)) {
+            config.put(key, getAllocatedPorts().get(lookupKey));
+          }
+        } else {
+          if (this.getAllocatedPorts(containerId).containsKey(lookupKey)) {
+            config.put(key, getAllocatedPorts(containerId).get(lookupKey));
+          }
         }
       }
     }
@@ -959,7 +1363,7 @@
 
   private Iterable<String> getHostsList(Collection<ClusterNode> values,
                                         boolean hostOnly) {
-    List<String> hosts = new ArrayList<>();
+    List<String> hosts = new ArrayList<String>();
     for (ClusterNode cn : values) {
       hosts.add(hostOnly ? cn.host : cn.host + "/" + cn.name);
     }
@@ -967,17 +1371,11 @@
     return hosts;
   }
 
-  private void addDefaultGlobalConfig(Map<String, String> config) {
-    config.put("app_log_dir", "${AGENT_LOG_ROOT}/app/log");
+  private void addDefaultGlobalConfig(Map<String, String> config, String containerId) {
+    config.put("app_log_dir", "${AGENT_LOG_ROOT}");
     config.put("app_pid_dir", "${AGENT_WORK_ROOT}/app/run");
     config.put("app_install_dir", "${AGENT_WORK_ROOT}/app/install");
-  }
-
-  @Override
-  public Map<String, String> buildMonitorDetails(ClusterDescription clusterDesc) {
-    Map<String, String> details = super.buildMonitorDetails(clusterDesc);
-    buildRoleHostDetails(details);
-    return details;
+    config.put("app_container_id", containerId);
   }
 
   private void buildRoleHostDetails(Map<String, String> details) {
@@ -988,25 +1386,4 @@
                   "");
     }
   }
-
-  @Override
-  public void applyInitialRegistryDefinitions(URL unsecureWebAPI,
-                                              URL secureWebAPI,
-                                              ServiceInstanceData instanceData) throws IOException {
-    super.applyInitialRegistryDefinitions(unsecureWebAPI,
-                                          secureWebAPI,
-                                          instanceData
-    );
-
-    try {
-      instanceData.internalView.endpoints.put(
-          CustomRegistryConstants.AGENT_REST_API,
-          new RegisteredEndpoint(
-              new URL(secureWebAPI, SLIDER_PATH_AGENTS),
-              "Agent REST API"));
-    } catch (URISyntaxException e) {
-      throw new IOException(e);
-    }
-
-  }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java
index 0dce4bb..f4ace5f 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentCommandOrder.java
@@ -37,7 +37,8 @@
   public static final Logger log =
       LoggerFactory.getLogger(ComponentCommandOrder.class);
   private static char SPLIT_CHAR = '-';
-  Map<Command, Map<String, List<ComponentState>>> dependencies = new HashMap<>();
+  Map<Command, Map<String, List<ComponentState>>> dependencies =
+      new HashMap<Command, Map<String, List<ComponentState>>>();
 
   public ComponentCommandOrder(List<CommandOrder> commandOrders) {
     if (commandOrders != null && commandOrders.size() > 0) {
@@ -48,13 +49,13 @@
         if (requiredStates.size() > 0) {
           Map<String, List<ComponentState>> compDep = dependencies.get(componentCmd.command);
           if (compDep == null) {
-            compDep = new HashMap<>();
+            compDep = new HashMap<String, List<ComponentState>>();
             dependencies.put(componentCmd.command, compDep);
           }
 
           List<ComponentState> requirements = compDep.get(componentCmd.componentName);
           if (requirements == null) {
-            requirements = new ArrayList<>();
+            requirements = new ArrayList<ComponentState>();
             compDep.put(componentCmd.componentName, requirements);
           }
 
@@ -70,7 +71,7 @@
     }
 
     String[] componentStates = requires.split(",");
-    List<ComponentState> retList = new ArrayList<>();
+    List<ComponentState> retList = new ArrayList<ComponentState>();
     for (String componentStateStr : componentStates) {
       retList.add(getComponentState(componentStateStr));
     }
@@ -126,18 +127,18 @@
       for (ComponentState stateToMatch : required) {
         for (ComponentInstanceState currState : currentStates) {
           log.debug("Checking schedule {} {} against dependency {} is {}",
-                    component, command, currState.getCompName(), currState.getState());
-          if (currState.getCompName().equals(stateToMatch.componentName)) {
+                    component, command, currState.getComponentName(), currState.getState());
+          if (currState.getComponentName().equals(stateToMatch.componentName)) {
             if (currState.getState() != stateToMatch.state) {
               if (stateToMatch.state == State.STARTED) {
                 log.info("Cannot schedule {} {} as dependency {} is {}",
-                         component, command, currState.getCompName(), currState.getState());
+                         component, command, currState.getComponentName(), currState.getState());
                 canExecute = false;
               } else {
                 //state is INSTALLED
                 if (currState.getState() != State.STARTING && currState.getState() != State.STARTED) {
                   log.info("Cannot schedule {} {} as dependency {} is {}",
-                           component, command, currState.getCompName(), currState.getState());
+                           component, command, currState.getComponentName(), currState.getState());
                   canExecute = false;
                 }
               }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java
index 60a6f82..f7f8bf4 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/ComponentInstanceState.java
@@ -19,6 +19,8 @@
 package org.apache.slider.providers.agent;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -30,8 +32,9 @@
   private static String INVALID_TRANSITION_ERROR =
       "Result {0} for command {1} is not expected for component {2} in state {3}.";
 
-  private final String compName;
-  private final String containerId;
+  private final String componentName;
+  private final ContainerId containerId;
+  private final String containerIdAsString;
   private final String applicationId;
   private State state = State.INIT;
   private State targetState = State.STARTED;
@@ -40,18 +43,19 @@
   private long lastHeartbeat = 0;
   private ContainerState containerState;
 
-  public ComponentInstanceState(String compName,
-                                String containerId,
-                                String applicationId) {
-    this.compName = compName;
+  public ComponentInstanceState(String componentName,
+      ContainerId containerId,
+      String applicationId) {
+    this.componentName = componentName;
     this.containerId = containerId;
+    this.containerIdAsString = containerId.toString();
     this.applicationId = applicationId;
     this.containerState = ContainerState.INIT;
     this.lastHeartbeat = System.currentTimeMillis();
   }
 
-  public String getCompName() {
-    return compName;
+  public String getComponentName() {
+    return componentName;
   }
 
   public Boolean getConfigReported() {
@@ -74,20 +78,32 @@
     return lastHeartbeat;
   }
 
-  public void setLastHeartbeat(long lastHeartbeat) {
-    this.lastHeartbeat = lastHeartbeat;
-    if(this.containerState == ContainerState.UNHEALTHY ||
-       this.containerState == ContainerState.INIT) {
-      this.containerState = ContainerState.HEALTHY;
+  /**
+   * Update the heartbeat, and change container state
+   * to mark as healthy if appropriate
+   * @param heartbeatTime last time the heartbeat was seen
+   * @return the current container state
+   */
+  public ContainerState heartbeat(long heartbeatTime) {
+    this.lastHeartbeat = heartbeatTime;
+    if(containerState == ContainerState.UNHEALTHY ||
+       containerState == ContainerState.INIT) {
+      containerState = ContainerState.HEALTHY;
     }
+    return containerState;
+  }
+  
+
+  public ContainerId getContainerId() {
+    return containerId;
   }
 
   public void commandIssued(Command command) {
     Command expected = getNextCommand();
     if (expected != command) {
-      throw new IllegalArgumentException("Command " + command + " is not allowed is state " + state);
+      throw new IllegalArgumentException("Command " + command + " is not allowed in state " + state);
     }
-    this.state = this.state.getNextState(command);
+    state = state.getNextState(command);
   }
 
   public void applyCommandResult(CommandResult result, Command command) {
@@ -101,12 +117,12 @@
       } else if (result == CommandResult.COMPLETED) {
         failuresSeen = 0;
       }
-      this.state = this.state.getNextState(result);
+      state = state.getNextState(result);
     } catch (IllegalArgumentException e) {
       String message = String.format(INVALID_TRANSITION_ERROR,
                                      result.toString(),
                                      command.toString(),
-                                     compName,
+                                     componentName,
                                      state.toString());
       log.warn(message);
       throw new IllegalStateException(message);
@@ -114,8 +130,8 @@
   }
 
   public boolean hasPendingCommand() {
-    if (this.state.canIssueCommands() &&
-        this.state != this.targetState &&
+    if (state.canIssueCommands() &&
+        state != targetState &&
         failuresSeen < MAX_FAILURE_TOLERATED) {
       return true;
     }
@@ -144,8 +160,8 @@
   public int hashCode() {
     int hashCode = 1;
 
-    hashCode = hashCode ^ (compName != null ? compName.hashCode() : 0);
-    hashCode = hashCode ^ (containerId != null ? containerId.hashCode() : 0);
+    hashCode = hashCode ^ (componentName != null ? componentName.hashCode() : 0);
+    hashCode = hashCode ^ (containerIdAsString != null ? containerIdAsString.hashCode() : 0);
     hashCode = hashCode ^ (applicationId != null ? applicationId.hashCode() : 0);
     return hashCode;
   }
@@ -158,13 +174,13 @@
 
     ComponentInstanceState that = (ComponentInstanceState) o;
 
-    if (this.compName != null ?
-        !this.compName.equals(that.compName) : this.compName != null) {
+    if (this.componentName != null ?
+        !this.componentName.equals(that.componentName) : this.componentName != null) {
       return false;
     }
 
-    if (this.containerId != null ?
-        !this.containerId.equals(that.containerId) : this.containerId != null) {
+    if (this.containerIdAsString != null ?
+        !this.containerIdAsString.equals(that.containerIdAsString) : this.containerIdAsString != null) {
       return false;
     }
 
@@ -175,4 +191,18 @@
 
     return true;
   }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb =
+        new StringBuilder("ComponentInstanceState{");
+    sb.append("containerIdAsString='").append(containerIdAsString).append('\'');
+    sb.append(", state=").append(state);
+    sb.append(", failuresSeen=").append(failuresSeen);
+    sb.append(", lastHeartbeat=").append(lastHeartbeat);
+    sb.append(", containerState=").append(containerState);
+    sb.append(", componentName='").append(componentName).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java b/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
index 3aeff66..0a1beca 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
@@ -17,6 +17,8 @@
  */
 package org.apache.slider.providers.agent;
 
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -53,10 +55,7 @@
   }
 
   public boolean isAlive() {
-    if (monitorThread != null) {
-      return monitorThread.isAlive();
-    }
-    return false;
+    return monitorThread != null && monitorThread.isAlive();
   }
 
   @Override
@@ -66,7 +65,7 @@
         log.debug("Putting monitor to sleep for " + threadWakeupInterval + " " +
                   "milliseconds");
         Thread.sleep(threadWakeupInterval);
-        doWork();
+        doWork(System.currentTimeMillis());
       } catch (InterruptedException ex) {
         log.warn("Scheduler thread is interrupted going to stop", ex);
         shouldRun = false;
@@ -83,32 +82,46 @@
    * received in last check interval they are marked as UNHEALTHY. INIT is when the agent is started but it did not
    * communicate at all. HEALTHY being the AM has received heartbeats. After an interval as UNHEALTHY the container is
    * declared unavailable
+   * @param now current time in milliseconds ... tests can set this explicitly
    */
-  private void doWork() {
+  @VisibleForTesting
+  public void doWork(long now) {
     Map<String, ComponentInstanceState> componentStatuses = provider.getComponentStatuses();
     if (componentStatuses != null) {
       for (String containerLabel : componentStatuses.keySet()) {
         ComponentInstanceState componentInstanceState = componentStatuses.get(containerLabel);
-        long timeSinceLastHeartbeat = System.currentTimeMillis() - componentInstanceState.getLastHeartbeat();
+        long timeSinceLastHeartbeat = now - componentInstanceState.getLastHeartbeat();
 
         if (timeSinceLastHeartbeat > threadWakeupInterval) {
-          if (componentInstanceState.getContainerState() == ContainerState.HEALTHY ||
-              componentInstanceState.getContainerState() == ContainerState.INIT) {
-            componentInstanceState.setContainerState(ContainerState.UNHEALTHY);
-            log.warn("Component {} marked UNHEALTHY. Last heartbeat received at {} approx. {} ms. back.",
-                     containerLabel, componentInstanceState.getLastHeartbeat(),
-                     timeSinceLastHeartbeat);
-            continue;
+          switch (componentInstanceState.getContainerState()) {
+            case INIT:
+            case HEALTHY:
+              componentInstanceState.setContainerState(ContainerState.UNHEALTHY);
+              log.warn(
+                  "Component {} marked UNHEALTHY. Last heartbeat received at {} approx. {} ms. back.",
+                  componentInstanceState,
+                  componentInstanceState.getLastHeartbeat(),
+                  timeSinceLastHeartbeat);
+              break;
+            case UNHEALTHY:
+              if (timeSinceLastHeartbeat > threadWakeupInterval * 2) {
+                componentInstanceState.setContainerState(
+                    ContainerState.HEARTBEAT_LOST);
+                log.warn(
+                    "Component {} marked HEARTBEAT_LOST. Last heartbeat received at {} approx. {} ms. back.",
+                    componentInstanceState, componentInstanceState.getLastHeartbeat(),
+                    timeSinceLastHeartbeat);
+                ContainerId containerId =
+                    componentInstanceState.getContainerId();
+                provider.lostContainer(containerLabel, containerId);
+              }
+              break;
+            case HEARTBEAT_LOST:
+              // unexpected case
+              log.warn("Heartbeat from lost component: {}", componentInstanceState);
+              break;
           }
-          if (componentInstanceState.getContainerState() == ContainerState.UNHEALTHY
-              && timeSinceLastHeartbeat > threadWakeupInterval * 2) {
-            componentInstanceState.setContainerState(ContainerState.HEARTBEAT_LOST);
-            log.warn("Component {} marked HEARTBEAT_LOST. Last heartbeat received at {} approx. {} ms. back.",
-                     containerLabel, componentInstanceState.getLastHeartbeat(),
-                     timeSinceLastHeartbeat);
-            this.provider.releaseContainer(containerLabel);
-            continue;
-          }
+            
         }
       }
     }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java
index b007313..d994e33 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Application.java
@@ -26,6 +26,7 @@
   String name;
   String comment;
   String version;
+  String exportedConfigs;
   List<Component> components;
   List<ExportGroup> exportGroups;
   List<OSSpecific> osSpecifics;
@@ -33,10 +34,10 @@
   ConfigurationDependencies configDependencies;
 
   public Application() {
-    exportGroups = new ArrayList<>();
-    components = new ArrayList<>();
-    osSpecifics = new ArrayList<>();
-    commandOrders = new ArrayList<>();
+    exportGroups = new ArrayList<ExportGroup>();
+    components = new ArrayList<Component>();
+    osSpecifics = new ArrayList<OSSpecific>();
+    commandOrders = new ArrayList<CommandOrder>();
   }
 
   public String getName() {
@@ -63,6 +64,14 @@
     this.version = version;
   }
 
+  public String getExportedConfigs() {
+    return exportedConfigs;
+  }
+
+  public void setExportedConfigs(String exportedConfigs) {
+    this.exportedConfigs = exportedConfigs;
+  }
+
   public ConfigurationDependencies getConfigDependencies() {
     return configDependencies;
   }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java
index 6cd08e0..03c64d4 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/Component.java
@@ -16,6 +16,9 @@
  */
 package org.apache.slider.providers.agent.application.metadata;
 
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  *
  */
@@ -25,10 +28,14 @@
   String publishConfig;
   String minInstanceCount;
   String maxInstanceCount;
+  String autoStartOnFailure;
+  String appExports;
   CommandScript commandScript;
+  List<ComponentExport> componentExports;
 
   public Component() {
     publishConfig = Boolean.FALSE.toString();
+    componentExports = new ArrayList<ComponentExport>();
   }
 
   public String getName() {
@@ -55,6 +62,22 @@
     this.publishConfig = publishConfig;
   }
 
+  public String getAutoStartOnFailure() {
+    return autoStartOnFailure;
+  }
+
+  public void setAutoStartOnFailure(String autoStartOnFailure) {
+    this.autoStartOnFailure = autoStartOnFailure;
+  }
+
+  public String getAppExports() {
+    return appExports;
+  }
+
+  public void setAppExports(String appExports) {
+    this.appExports = appExports;
+  }
+
   public String getMinInstanceCount() {
     return minInstanceCount;
   }
@@ -79,6 +102,18 @@
     this.commandScript = commandScript;
   }
 
+  public void addComponentExport(ComponentExport export) {
+    componentExports.add(export);
+  }
+
+  public List<ComponentExport> getComponentExports() {
+    return componentExports;
+  }
+
+  public Boolean getRequiresAutoRestart() {
+    return Boolean.parseBoolean(this.autoStartOnFailure);
+  }
+
   @Override
   public String toString() {
     final StringBuilder sb =
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/ComponentExport.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/ComponentExport.java
new file mode 100644
index 0000000..a18854c
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/ComponentExport.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.slider.providers.agent.application.metadata;
+
+/**
+ *
+ */
+public class ComponentExport {
+  String name;
+  String value;
+
+  public ComponentExport() {
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  public String getValue() {
+    return value;
+  }
+
+  public void setValue(String value) {
+    this.value = value;
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb =
+        new StringBuilder("{");
+    sb.append(",\n\"name\": ").append(name);
+    sb.append(",\n\"value\": ").append(value);
+    sb.append('}');
+    return sb.toString();
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java
index c7922a7..bc93d6f 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/application/metadata/MetainfoParser.java
@@ -38,6 +38,7 @@
     digester.addBeanPropertySetter("*/application/name");
     digester.addBeanPropertySetter("*/application/comment");
     digester.addBeanPropertySetter("*/application/version");
+    digester.addBeanPropertySetter("*/application/exportedConfigs");
 
     digester.addObjectCreate("*/commandOrder", CommandOrder.class);
     digester.addBeanPropertySetter("*/commandOrder/command");
@@ -58,6 +59,12 @@
     digester.addBeanPropertySetter("*/component/publishConfig");
     digester.addBeanPropertySetter("*/component/minInstanceCount");
     digester.addBeanPropertySetter("*/component/maxInstanceCount");
+    digester.addBeanPropertySetter("*/component/autoStartOnFailure");
+    digester.addBeanPropertySetter("*/component/appExports");
+    digester.addObjectCreate("*/componentExport", ComponentExport.class);
+    digester.addBeanPropertySetter("*/componentExport/name");
+    digester.addBeanPropertySetter("*/componentExport/value");
+    digester.addSetNext("*/componentExport", "addComponentExport");
     digester.addSetNext("*/component", "addComponent");
 
     digester.addObjectCreate("*/commandScript", CommandScript.class);
diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
index 6aeb801..dc84f02 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMClientProvider.java
@@ -19,6 +19,7 @@
 package org.apache.slider.providers.slideram;
 
 import com.beust.jcommander.JCommander;
+import com.codahale.metrics.MetricRegistry;
 import com.google.gson.GsonBuilder;
 import org.apache.curator.CuratorZookeeperClient;
 import org.apache.curator.framework.CuratorFramework;
@@ -28,7 +29,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.api.RoleKeys;
 import org.apache.slider.common.SliderKeys;
@@ -59,8 +60,8 @@
  * This keeps aspects of role, cluster validation and Clusterspec setup
  * out of the core slider client
  */
-public class SliderAMClientProvider extends AbstractClientProvider implements
-    SliderKeys {
+public class SliderAMClientProvider extends AbstractClientProvider
+    implements SliderKeys {
 
 
   protected static final Logger log =
@@ -83,7 +84,7 @@
    * List of roles
    */
   public static final List<ProviderRole> ROLES =
-    new ArrayList<>();
+    new ArrayList<ProviderRole>();
 
   public static final int KEY_AM = ROLE_AM_PRIORITY_INDEX;
 
@@ -113,9 +114,8 @@
                                                     AggregateConf instanceDefinition,
                                                     Path clusterDirPath,
                                                     Path generatedConfDirPath,
-                                                    boolean secure) throws
-      SliderException,
-                                                                    IOException {
+                                                    boolean secure)
+      throws SliderException, IOException {
 
     super.preflightValidateClusterConfiguration(sliderFileSystem, clustername, configuration, instanceDefinition, clusterDirPath, generatedConfDirPath, secure);
     //add a check for the directory being writeable by the current user
@@ -123,7 +123,7 @@
       dataPath = instanceDefinition.getInternalOperations()
                                    .getGlobalOptions()
                                    .getMandatoryOption(
-                                     OptionKeys.INTERNAL_DATA_DIR_PATH);
+                                     InternalKeys.INTERNAL_DATA_DIR_PATH);
 
     Path path = new Path(dataPath);
     sliderFileSystem.verifyDirectoryWriteAccess(path);
@@ -147,7 +147,7 @@
     throws IOException, SliderException {
 
     Map<String, LocalResource> providerResources =
-        new HashMap<>();
+        new HashMap<String, LocalResource>();
 
 
     ProviderUtils.addProviderJar(providerResources,
@@ -167,7 +167,7 @@
       CuratorZookeeperClient.class,
       ServiceInstance.class,
       ServiceNames.class,
-
+      MetricRegistry.class
     };
     String[] jars =
       {
@@ -179,6 +179,7 @@
         "curator-client.jar",
         "curator-x-discovery.jar",
         "curator-x-discovery-service.jar",
+        "metrics-core.jar"
       };
     ProviderUtils.addDependencyJars(providerResources, fileSystem, tempPath,
                                     libdir, jars,
@@ -209,11 +210,11 @@
    * add them to the command line
    */
   public void addJVMOptions(AggregateConf aggregateConf,
-                            JavaCommandLineBuilder cmdLine) throws
-                                                        BadConfigException {
-    
+                            JavaCommandLineBuilder cmdLine)
+      throws BadConfigException {
+
     MapOperations sliderAM =
-      aggregateConf.getAppConfOperations().getMandatoryComponent(
+        aggregateConf.getAppConfOperations().getMandatoryComponent(
         SliderKeys.COMPONENT_AM);
     cmdLine.forceIPv4().headless();
     String heap = sliderAM.getOption(RoleKeys.JVM_HEAP,
@@ -227,11 +228,10 @@
 
 
   @Override
-  public void prepareInstanceConfiguration(AggregateConf aggregateConf) throws
-      SliderException,
-                                                                        IOException {
+  public void prepareInstanceConfiguration(AggregateConf aggregateConf)
+      throws SliderException, IOException {
     mergeTemplates(aggregateConf,
-                   INTERNAL_JSON, RESOURCES_JSON, APPCONF_JSON
+        INTERNAL_JSON, RESOURCES_JSON, APPCONF_JSON
                   );
   }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
index 184c25a..071fc19 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
@@ -100,7 +100,7 @@
 
   @Override
   public List<ProviderRole> getRoles() {
-    return new ArrayList<>(0);
+    return new ArrayList<ProviderRole>(0);
   }
 
   @Override
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/ContainerStartOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/ContainerStartOperation.java
deleted file mode 100644
index 50c99f3..0000000
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/ContainerStartOperation.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.slider.server.appmaster;
-
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
-import org.apache.slider.server.appmaster.state.RoleInstance;
-
-/**
- * Callback for container start requests
- */
-public interface ContainerStartOperation {
-  /**
-   * Add a node to the list of starting
-   * nodes then trigger the NM start operation with the given
-   * launch context
-   * @param container container
-   * @param ctx context
-   * @param instance node details
-   */
-  void startContainer(Container container,
-                      ContainerLaunchContext ctx,
-                      RoleInstance instance) ;
-}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java
index 5a5baaa..e8b6802 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/RoleLaunchService.java
@@ -28,32 +28,42 @@
 import org.apache.slider.core.launch.ContainerLauncher;
 import org.apache.slider.providers.ProviderRole;
 import org.apache.slider.providers.ProviderService;
+import org.apache.slider.server.appmaster.actions.ActionStartContainer;
+import org.apache.slider.server.appmaster.actions.AsyncAction;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
 import org.apache.slider.server.appmaster.state.RoleInstance;
 import org.apache.slider.server.appmaster.state.RoleStatus;
-import org.apache.slider.server.services.workflow.AbstractWorkflowExecutorService;
+import org.apache.slider.server.services.workflow.WorkflowExecutorService;
 import org.apache.slider.server.services.workflow.ServiceThreadFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
 /**
  * A service for launching containers
  */
-public class RoleLaunchService extends AbstractWorkflowExecutorService {
+public class RoleLaunchService
+    extends WorkflowExecutorService<ExecutorService> {
   protected static final Logger log =
     LoggerFactory.getLogger(RoleLaunchService.class);
 
   public static final String ROLE_LAUNCH_SERVICE = "RoleLaunchService";
 
-  /**
-   * Callback to whatever has the task of actually running the container
-   * start operation
-   */
-  private final ContainerStartOperation containerStarter;
 
+  /**
+   * Queue submission API
+   */
+  private final QueueAccess actionQueue;
+
+  /**
+   * Provider bulding up the command
+   */
   private final ProviderService provider;
+  
   /**
    * Filesystem to use for the launch
    */
@@ -75,20 +85,21 @@
   /**
    * Construct an instance of the launcher
    * @param startOperation the callback to start the opreation
+   * @param actionQueue
    * @param provider the provider
    * @param fs filesystem
    * @param generatedConfDirPath path in the FS for the generated dir
    * @param envVars environment variables
    * @param launcherTmpDirPath path for a temporary data in the launch process
    */
-  public RoleLaunchService(ContainerStartOperation startOperation,
-                           ProviderService provider,
-                           SliderFileSystem fs,
-                           Path generatedConfDirPath,
-                           Map<String, String> envVars,
+  public RoleLaunchService(QueueAccess queueAccess,
+      ProviderService provider,
+      SliderFileSystem fs,
+      Path generatedConfDirPath,
+      Map<String, String> envVars,
       Path launcherTmpDirPath) {
     super(ROLE_LAUNCH_SERVICE);
-    containerStarter = startOperation;
+    this.actionQueue = queueAccess;
     this.fs = fs;
     this.generatedConfDirPath = generatedConfDirPath;
     this.launcherTmpDirPath = launcherTmpDirPath;
@@ -203,9 +214,10 @@
         instance.role = containerRole;
         instance.roleId = role.id;
         instance.environment = envDescription;
-        containerStarter.startContainer(container,
-                                        containerLauncher.completeContainerLaunch(),
-                                        instance);
+        actionQueue.put(new ActionStartContainer("starting " + containerRole,
+            0, container,
+            containerLauncher.completeContainerLaunch(),
+            instance));
       } catch (Exception e) {
         log.error("Exception thrown while trying to start {}: {}",
             containerRole, e);
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index 0b22910..93adfb2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -18,6 +18,8 @@
 
 package org.apache.slider.server.appmaster;
 
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.BlockingService;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -55,7 +57,7 @@
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.webapp.WebApps;
 import org.apache.slider.api.ClusterDescription;
-import org.apache.slider.api.OptionKeys;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.api.RoleKeys;
 import org.apache.slider.api.SliderClusterProtocol;
@@ -75,12 +77,13 @@
 import org.apache.slider.core.build.InstanceIO;
 import org.apache.slider.core.conf.AggregateConf;
 import org.apache.slider.core.conf.ConfTree;
+import org.apache.slider.core.conf.ConfTreeOperations;
 import org.apache.slider.core.conf.MapOperations;
 import org.apache.slider.core.exceptions.BadConfigException;
 import org.apache.slider.core.exceptions.SliderException;
 import org.apache.slider.core.exceptions.SliderInternalStateException;
 import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
-import org.apache.slider.core.main.LauncherExitCodes;
+import org.apache.slider.core.main.ExitCodeProvider;
 import org.apache.slider.core.main.RunService;
 import org.apache.slider.core.main.ServiceLauncher;
 import org.apache.slider.core.persist.ConfTreeSerDeser;
@@ -94,17 +97,32 @@
 import org.apache.slider.providers.SliderProviderFactory;
 import org.apache.slider.providers.slideram.SliderAMClientProvider;
 import org.apache.slider.providers.slideram.SliderAMProviderService;
+import org.apache.slider.server.appmaster.actions.ActionKillContainer;
+import org.apache.slider.server.appmaster.actions.RegisterComponentInstance;
+import org.apache.slider.server.appmaster.actions.QueueExecutor;
+import org.apache.slider.server.appmaster.actions.ActionHalt;
+import org.apache.slider.server.appmaster.actions.QueueService;
+import org.apache.slider.server.appmaster.actions.ActionStopSlider;
+import org.apache.slider.server.appmaster.actions.AsyncAction;
+import org.apache.slider.server.appmaster.actions.RenewingAction;
+import org.apache.slider.server.appmaster.actions.ResetFailureWindow;
+import org.apache.slider.server.appmaster.actions.UnregisterComponentInstance;
+import org.apache.slider.server.appmaster.monkey.ChaosKillAM;
+import org.apache.slider.server.appmaster.monkey.ChaosKillContainer;
+import org.apache.slider.server.appmaster.monkey.ChaosMonkeyService;
+import org.apache.slider.server.appmaster.operations.AsyncRMOperationHandler;
+import org.apache.slider.server.appmaster.operations.ProviderNotifyingOperationHandler;
 import org.apache.slider.server.appmaster.rpc.RpcBinder;
 import org.apache.slider.server.appmaster.rpc.SliderAMPolicyProvider;
 import org.apache.slider.server.appmaster.rpc.SliderClusterProtocolPBImpl;
-import org.apache.slider.server.appmaster.state.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
 import org.apache.slider.server.appmaster.state.AppState;
 import org.apache.slider.server.appmaster.state.ContainerAssignment;
-import org.apache.slider.server.appmaster.state.ContainerReleaseOperation;
 import org.apache.slider.server.appmaster.state.ProviderAppState;
-import org.apache.slider.server.appmaster.state.RMOperationHandler;
+import org.apache.slider.server.appmaster.operations.RMOperationHandler;
 import org.apache.slider.server.appmaster.state.RoleInstance;
 import org.apache.slider.server.appmaster.state.RoleStatus;
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
 import org.apache.slider.server.appmaster.web.AgentService;
 import org.apache.slider.server.appmaster.web.rest.agent.AgentWebApp;
 import org.apache.slider.server.appmaster.web.SliderAMWebApp;
@@ -117,6 +135,8 @@
 import org.apache.slider.server.services.security.CertificateManager;
 import org.apache.slider.server.services.utility.AbstractSliderLaunchedService;
 import org.apache.slider.server.services.utility.WebAppService;
+import org.apache.slider.server.services.workflow.ServiceThreadFactory;
+import org.apache.slider.server.services.workflow.WorkflowExecutorService;
 import org.apache.slider.server.services.workflow.WorkflowRpcService;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -132,9 +152,11 @@
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
@@ -154,9 +176,7 @@
     SliderClusterProtocol,
     ServiceStateChangeListener,
     RoleKeys,
-    ProviderCompleted,
-    ContainerStartOperation,
-    AMViewForProviders {
+    ProviderCompleted {
   protected static final Logger log =
     LoggerFactory.getLogger(SliderAppMaster.class);
 
@@ -179,7 +199,13 @@
 
   public static final int HEARTBEAT_INTERVAL = 1000;
   public static final int NUM_RPC_HANDLERS = 5;
+  public static final int SCHEDULED_EXECUTOR_POOL_SIZE = 1;
 
+  /**
+   * Singleton of metrics registry
+   */
+  public static final MetricRegistry metrics = new MetricRegistry();
+  
   /** YARN RPC to communicate with the Resource Manager or Node Manager */
   private YarnRPC yarnRPC;
 
@@ -190,6 +216,8 @@
   @SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized")
 
   private RMOperationHandler rmOperationHandler;
+  
+  private RMOperationHandler providerRMOperationHandler;
 
   /** Handle to communicate with the Node Manager*/
   @SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized")
@@ -277,6 +305,11 @@
   private ContainerId appMasterContainerID;
 
   /**
+   * Monkey Service -may be null
+   */
+  private ChaosMonkeyService monkey;
+  
+  /**
    * ProviderService of this cluster
    */
   @SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized")
@@ -293,7 +326,6 @@
    */
   private int containerMaxCores;
 
-
   /**
    * limit container memory
    */
@@ -315,6 +347,10 @@
   private String agentAccessUrl;
   private CertificateManager certificateManager;
 
+  private WorkflowExecutorService<ExecutorService> executorService;
+  
+  private final QueueService actionQueues = new QueueService();
+  
   /**
    * Service Constructor
    */
@@ -322,9 +358,7 @@
     super(SERVICE_CLASSNAME_SHORT);
   }
 
-
-
- /* =================================================================== */
+/* =================================================================== */
 /* service lifecycle methods */
 /* =================================================================== */
 
@@ -340,7 +374,7 @@
     SliderAMCreateAction createAction = (SliderAMCreateAction) action;
     //sort out the location of the AM
     serviceArgs.applyDefinitions(conf);
-    serviceArgs.applyFileSystemURL(conf);
+    serviceArgs.applyFileSystemBinding(conf);
 
     String rmAddress = createAction.getRmAddress();
     if (rmAddress != null) {
@@ -348,7 +382,7 @@
       SliderUtils.setRmSchedulerAddress(conf, rmAddress);
     }
     serviceArgs.applyDefinitions(conf);
-    serviceArgs.applyFileSystemURL(conf);
+    serviceArgs.applyFileSystemBinding(conf);
     //init security with our conf
     if (SliderUtils.isHadoopClusterSecure(conf)) {
       log.info("Secure mode with kerberos realm {}",
@@ -368,11 +402,25 @@
     //look at settings of Hadoop Auth, to pick up a problem seen once
     checkAndWarnForAuthTokenProblems();
 
+    executorService = new WorkflowExecutorService<ExecutorService>("AmExecutor",
+        Executors.newCachedThreadPool(
+        new ServiceThreadFactory("AmExecutor", true)));
+    addService(executorService);
+
+
+    addService(actionQueues);
     //init all child services
     super.serviceInit(conf);
   }
+
+  @Override
+  protected void serviceStart() throws Exception {
+    super.serviceStart();
+    executorService.execute(new QueueExecutor(this, actionQueues));
+    executorService.execute(actionQueues);
+  }
   
-/* =================================================================== */
+  /* =================================================================== */
 /* RunService methods called from ServiceLauncher */
 /* =================================================================== */
 
@@ -412,7 +460,8 @@
     String action = serviceArgs.getAction();
     List<String> actionArgs = serviceArgs.getActionArgs();
     int exitCode;
-    switch (action) {
+/*  JDK7
+  switch (action) {
       case SliderActions.ACTION_HELP:
         log.info(getName() + serviceArgs.usage());
         exitCode = LauncherExitCodes.EXIT_USAGE;
@@ -423,6 +472,15 @@
       default:
         throw new SliderException("Unimplemented: " + action);
     }
+    */
+    if (action.equals(SliderActions.ACTION_HELP)) {
+      log.info(getName() + serviceArgs.usage());
+      exitCode = SliderExitCodes.EXIT_USAGE;
+    } else if (action.equals(SliderActions.ACTION_CREATE)) {
+      exitCode = createAndRunCluster(actionArgs.get(0));
+    } else {
+      throw new SliderException("Unimplemented: " + action);
+    }
     log.info("Exiting AM; final exit code = {}", exitCode);
     return exitCode;
   }
@@ -483,10 +541,9 @@
     serviceConf.set(SliderAmIpFilter.WS_CONTEXT_ROOT, WS_CONTEXT_ROOT + "|" + WS_AGENT_CONTEXT_ROOT);
     
     //get our provider
-    MapOperations globalInternalOptions =
-      instanceDefinition.getInternalOperations().getGlobalOptions();
+    MapOperations globalInternalOptions = getGlobalInternalOptions();
     String providerType = globalInternalOptions.getMandatoryOption(
-      OptionKeys.INTERNAL_PROVIDER_NAME);
+      InternalKeys.INTERNAL_PROVIDER_NAME);
     log.info("Cluster provider type is {}", providerType);
     SliderProviderFactory factory =
       SliderProviderFactory.createSliderProviderFactory(
@@ -494,6 +551,9 @@
     providerService = factory.createServerProvider();
     // init the provider BUT DO NOT START IT YET
     initAndAddService(providerService);
+    providerRMOperationHandler =
+        new ProviderNotifyingOperationHandler(providerService);
+    
     // create a slider AM provider
     sliderAMProvider = new SliderAMProviderService();
     initAndAddService(sliderAMProvider);
@@ -553,7 +613,7 @@
     }
 
     Map<String, String> envVars;
-
+    List<Container> liveContainers;
     /**
      * It is critical this section is synchronized, to stop async AM events
      * arriving while registering a restarting AM.
@@ -593,7 +653,7 @@
 
       //build the role map
       List<ProviderRole> providerRoles =
-        new ArrayList<>(providerService.getRoles());
+        new ArrayList<ProviderRole>(providerService.getRoles());
       providerRoles.addAll(SliderAMClientProvider.ROLES);
 
       // Start up the WebApp and track the URL for it
@@ -615,7 +675,7 @@
                       .start(webApp);
       appMasterTrackingUrl = "http://" + appMasterHostname + ":" + webApp.port();
       WebAppService<SliderAMWebApp> webAppService =
-        new WebAppService<>("slider", webApp);
+        new WebAppService<SliderAMWebApp>("slider", webApp);
 
       webAppService.init(serviceConf);
       webAppService.start();
@@ -655,13 +715,16 @@
       }
 
       // extract container list
-      List<Container> liveContainers =
-          response.getContainersFromPreviousAttempts();
+
+      liveContainers = response.getContainersFromPreviousAttempts();
 
       //now validate the installation
       Configuration providerConf =
         providerService.loadProviderConfigurationInformation(confDir);
 
+      providerService
+          .initializeApplicationConfiguration(instanceDefinition, fs);
+
       providerService.validateApplicationConfiguration(instanceDefinition, 
                                                        confDir,
                                                        securityEnabled);
@@ -671,12 +734,17 @@
 
       //build the instance
       appState.buildInstance(instanceDefinition,
-                             providerConf,
-                             providerRoles,
-                             fs.getFileSystem(),
-                             historyDir,
-                             liveContainers,
-                             appInformation);
+          serviceConf,
+          providerConf,
+          providerRoles,
+          fs.getFileSystem(),
+          historyDir,
+          liveContainers,
+          appInformation,
+          new SimpleReleaseSelector());
+
+      providerService.rebuildContainerDetails(liveContainers,
+          instanceDefinition.getName(), appState.getRolePriorityMap());
 
       // add the AM to the list of nodes in the cluster
       
@@ -687,21 +755,21 @@
 
       // build up environment variables that the AM wants set in every container
       // irrespective of provider and role.
-      envVars = new HashMap<>();
+      envVars = new HashMap<String, String>();
       if (hadoop_user_name != null) {
         envVars.put(HADOOP_USER_NAME, hadoop_user_name);
       }
     }
     String rolesTmpSubdir = appMasterContainerID.toString() + "/roles";
 
-    String amTmpDir = globalInternalOptions.getMandatoryOption(OptionKeys.INTERNAL_AM_TMP_DIR);
+    String amTmpDir = globalInternalOptions.getMandatoryOption(InternalKeys.INTERNAL_AM_TMP_DIR);
 
     Path tmpDirPath = new Path(amTmpDir);
     Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir);
     fs.getFileSystem().mkdirs(launcherTmpDirPath);
     
     //launcher service
-    launchService = new RoleLaunchService(this,
+    launchService = new RoleLaunchService(actionQueues,
                                           providerService,
                                           fs,
                                           new Path(getGeneratedConfDir()),
@@ -714,21 +782,24 @@
 
 
     //Give the provider restricted access to the state, registry
-    providerService.bind(stateForProviders, registry, this);
-    sliderAMProvider.bind(stateForProviders, registry, null);
+    providerService.bind(stateForProviders, registry, actionQueues,
+        liveContainers);
+    sliderAMProvider.bind(stateForProviders, registry, actionQueues,
+        liveContainers);
 
     // now do the registration
     registerServiceInstance(clustername, appid);
 
+    // chaos monkey
+    maybeStartMonkey();
+
+    // Start the Slider AM provider
     sliderAMProvider.start();
 
-
-    
-    // launch the provider; this is expected to trigger a callback that
+    // launch the real provider; this is expected to trigger a callback that
     // starts the node review process
     launchProviderService(instanceDefinition, confDir);
 
-
     try {
       //now block waiting to be told to exit the process
       waitForAMCompletionSignal();
@@ -742,7 +813,12 @@
 
   private void startAgentWebApp(MapOperations appInformation,
                                 Configuration serviceConf) {
-    LOG_YARN.info("AM classpath:" + ((URLClassLoader) AgentWebApp.class.getClassLoader() ).getURLs());
+    URL[] urls = ((URLClassLoader) AgentWebApp.class.getClassLoader() ).getURLs();
+    StringBuilder sb = new StringBuilder("AM classpath:");
+    for (URL url : urls) {
+      sb.append("\n").append(url.toString());
+    }
+    LOG_YARN.info(sb.append("\n").toString());
     // Start up the agent web app and track the URL for it
     AgentWebApp agentWebApp = AgentWebApp.$for(AgentWebApp.BASE_PATH,
                      new WebAppApiImpl(this,
@@ -826,6 +902,31 @@
   }
 
   /**
+   * Register/re-register a component (that is already in the app state
+   * @param id the component
+   */
+  public boolean registerComponent(ContainerId id) {
+    RoleInstance instance = appState.getOwnedContainer(id);
+    if (instance == null) {
+      return false;
+    }
+    // this is where component registrations will go
+    log.info("Registering component {}", id);
+
+    return true;
+  }
+  
+  /**
+   * unregister a component. At the time this message is received,
+   * the component may already been deleted from/never added to
+   * the app state
+   * @param id the component
+   */
+  public void unregisterComponent(ContainerId id) {
+    log.info("Unregistering component {}", id);
+  }
+  
+  /**
    * looks for a specific case where a token file is provided as an environment
    * variable, yet the file is not there.
    * 
@@ -861,9 +962,18 @@
    * @return the generated configuration dir
    */
   public String getGeneratedConfDir() {
+    return getGlobalInternalOptions().get(
+        InternalKeys.INTERNAL_GENERATED_CONF_PATH);
+  }
+
+  /**
+   * Get the global internal options for the AM
+   * @return a map to access the internals
+   */
+  public MapOperations getGlobalInternalOptions() {
     return getInstanceDefinition()
       .getInternalOperations().
-      getGlobalOptions().get(OptionKeys.INTERNAL_GENERATED_CONF_PATH);
+      getGlobalOptions();
   }
 
   /**
@@ -875,6 +985,22 @@
   }
 
   /**
+   * Get the AM log
+   * @return the log of the AM
+   */
+  public static Logger getLog() {
+    return log;
+  }
+
+  /**
+   * Get the application state
+   * @return the application state
+   */
+  public AppState getAppState() {
+    return appState;
+  }
+
+  /**
    * Block until it is signalled that the AM is done
    */
   private void waitForAMCompletionSignal() {
@@ -937,7 +1063,6 @@
     //stop any launches in progress
     launchService.stop();
 
-
     //now release all containers
     releaseAllContainers();
 
@@ -945,8 +1070,8 @@
     // signal to the RM
     log.info("Application completed. Signalling finish to RM");
 
-
     //if there were failed containers and the app isn't already down as failing, it is now
+/*
     int failedContainerCount = appState.getFailedCountainerCount();
     if (failedContainerCount != 0 &&
         appStatus == FinalApplicationStatus.SUCCEEDED) {
@@ -955,12 +1080,20 @@
         "Completed with exit code =  " + exitCode + " - " + getContainerDiagnosticInfo();
       success = false;
     }
+*/
     try {
       log.info("Unregistering AM status={} message={}", appStatus, appMessage);
       asyncRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
+/* JDK7
     } catch (YarnException | IOException e) {
       log.info("Failed to unregister application: " + e, e);
     }
+*/
+    } catch (IOException e) {
+      log.info("Failed to unregister application: " + e, e);
+    } catch (YarnException e) {
+      log.info("Failed to unregister application: " + e, e);
+    }
   }
 
   /**
@@ -1016,8 +1149,8 @@
   @Override //AMRMClientAsync
   public void onContainersAllocated(List<Container> allocatedContainers) {
     LOG_YARN.info("onContainersAllocated({})", allocatedContainers.size());
-    List<ContainerAssignment> assignments = new ArrayList<>();
-    List<AbstractRMOperation> operations = new ArrayList<>();
+    List<ContainerAssignment> assignments = new ArrayList<ContainerAssignment>();
+    List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>();
     
     //app state makes all the decisions
     appState.onContainersAllocated(allocatedContainers, assignments, operations);
@@ -1030,7 +1163,7 @@
     }
     
     //for all the operations, exec them
-    rmOperationHandler.execute(operations);
+    executeRMOperations(operations);
     log.info("Diagnostics: " + getContainerDiagnosticInfo());
   }
 
@@ -1050,20 +1183,15 @@
 
       // non complete containers should not be here
       assert (status.getState() == ContainerState.COMPLETE);
-      AppState.NodeCompletionResult result = appState.onCompletedNode(
-          getConfig(), status);
+      AppState.NodeCompletionResult result = appState.onCompletedNode(status);
       if (result.containerFailed) {
         RoleInstance ri = result.roleInstance;
         log.error("Role instance {} failed ", ri);
       }
-    }
 
-    // ask for more containers if any failed
-    // In the case of Slider, we don't expect containers to complete since
-    // Slider is a long running application. Keep track of how many containers
-    // are completing. If too many complete, abort the application
-    // TODO: this needs to be better thought about (and maybe something to
-    // better handle in Yarn for long running apps)
+      getProviderService().notifyContainerCompleted(containerId);
+      queue(new UnregisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS));
+    }
 
     try {
       reviewRequestAndReleaseNodes();
@@ -1076,19 +1204,52 @@
    * Implementation of cluster flexing.
    * It should be the only way that anything -even the AM itself on startup-
    * asks for nodes. 
+   * @param resources the resource tree
    * @return true if the any requests were made
    * @throws IOException
    */
-  private boolean flexCluster(ConfTree updated)
+  private boolean flexCluster(ConfTree resources)
     throws IOException, SliderInternalStateException, BadConfigException {
 
-    appState.updateResourceDefinitions(updated);
+    appState.updateResourceDefinitions(resources);
+
+    // reset the scheduled windows...the values
+    // may have changed
+    appState.resetFailureCounts();
+    
+
 
     // ask for more containers if needed
     return reviewRequestAndReleaseNodes();
   }
 
   /**
+   * Schedule the failure window
+   * @param resources the resource tree
+   * @throws BadConfigException if the window is out of range
+   */
+  private void scheduleFailureWindowResets(ConfTree resources) throws
+      BadConfigException {
+    ResetFailureWindow reset = new ResetFailureWindow();
+    ConfTreeOperations ops = new ConfTreeOperations(resources);
+    MapOperations globals = ops.getGlobalOptions();
+    long seconds = globals.getTimeRange(ResourceKeys.CONTAINER_FAILURE_WINDOW,
+        ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS,
+        ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS,
+        ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_MINUTES, 0);
+    if (seconds > 0) {
+      log.info(
+          "Scheduling the failure window reset interval to every {} seconds",
+          seconds);
+      RenewingAction<ResetFailureWindow> renew = new RenewingAction<ResetFailureWindow>(
+          reset, seconds, seconds, TimeUnit.SECONDS, 0);
+      actionQueues.renewing("failures", renew);
+    } else {
+      log.info("Failure window reset interval is not set");
+    }
+  }
+  
+  /**
    * Look at where the current node state is -and whether it should be changed
    */
   private synchronized boolean reviewRequestAndReleaseNodes()
@@ -1100,8 +1261,10 @@
     }
     try {
       List<AbstractRMOperation> allOperations = appState.reviewRequestAndReleaseNodes();
+      // tell the provider
+      providerRMOperationHandler.execute(allOperations);
       //now apply the operations
-      rmOperationHandler.execute(allOperations);
+      executeRMOperations(allOperations);
       return !allOperations.isEmpty();
     } catch (TriggerClusterTeardownException e) {
 
@@ -1117,7 +1280,7 @@
    */
   private void releaseAllContainers() {
     //now apply the operations
-    rmOperationHandler.execute(appState.releaseAllContainers());
+    executeRMOperations(appState.releaseAllContainers());
   }
 
   /**
@@ -1152,7 +1315,8 @@
   public void onError(Throwable e) {
     //callback says it's time to finish
     LOG_YARN.error("AMRMClientAsync.onError() received " + e, e);
-    signalAMComplete(EXIT_EXCEPTION_THROWN, "AMRMClientAsync.onError() received " + e);
+    signalAMComplete(EXIT_EXCEPTION_THROWN,
+        "AMRMClientAsync.onError() received " + e);
   }
   
 /* =================================================================== */
@@ -1187,8 +1351,8 @@
                                                                                                  YarnException {
     SliderUtils.getCurrentUser();
     String message = request.getMessage();
-    log.info("SliderAppMasterApi.stopCluster: {}",message);
-    signalAMComplete(EXIT_CLIENT_INITIATED_SHUTDOWN, message);
+    log.info("SliderAppMasterApi.stopCluster: {}", message);
+    schedule(new ActionStopSlider(message, 1000, TimeUnit.MILLISECONDS));
     return Messages.StopClusterResponseProto.getDefaultInstance();
   }
 
@@ -1200,8 +1364,8 @@
 
     String payload = request.getClusterSpec();
     ConfTreeSerDeser confTreeSerDeser = new ConfTreeSerDeser();
-    ConfTree updated = confTreeSerDeser.fromJson(payload);
-    boolean flexed = flexCluster(updated);
+    ConfTree updatedResources = confTreeSerDeser.fromJson(payload);
+    boolean flexed = flexCluster(updatedResources);
     return Messages.FlexClusterResponseProto.newBuilder().setResponse(flexed).build();
   }
 
@@ -1214,8 +1378,7 @@
     String result;
     //quick update
     //query and json-ify
-    ClusterDescription cd;
-    cd = getCurrentClusterStatus();
+    ClusterDescription cd = updateClusterStatus();
     result = cd.toJsonString();
     String stat = result;
     return Messages.GetJSONClusterStatusResponseProto.newBuilder()
@@ -1223,19 +1386,6 @@
       .build();
   }
 
-  /**
-   * Get the current cluster status, including any provider-specific info
-   * @return a status document
-   */
-  public ClusterDescription getCurrentClusterStatus() {
-    ClusterDescription cd;
-    synchronized (this) {
-      updateClusterStatus();
-      cd = getClusterDescription();
-    }
-    return cd;
-  }
-
 
   @Override
   public Messages.GetInstanceDefinitionResponseProto getInstanceDefinition(
@@ -1334,31 +1484,38 @@
     //throws NoSuchNodeException if it is missing
     RoleInstance instance =
       appState.getLiveInstanceByContainerID(containerID);
-    List<AbstractRMOperation> opsList =
-      new LinkedList<>();
-    ContainerReleaseOperation release =
-      new ContainerReleaseOperation(instance.getId());
-    opsList.add(release);
-    //now apply the operations
-    rmOperationHandler.execute(opsList);
+    queue(new ActionKillContainer(instance.getId(), 0, TimeUnit.MILLISECONDS,
+        rmOperationHandler));
     Messages.KillContainerResponseProto.Builder builder =
       Messages.KillContainerResponseProto.newBuilder();
     builder.setSuccess(true);
     return builder.build();
   }
 
+  public void executeRMOperations(List<AbstractRMOperation> operations) {
+    rmOperationHandler.execute(operations);
+  }
+
+  /**
+   * Get the RM operations handler for direct scheduling of work.
+   */
+  @VisibleForTesting
+  public RMOperationHandler getRmOperationHandler() {
+    return rmOperationHandler;
+  }
+
   @Override
-  public Messages.AMSuicideResponseProto amSuicide(Messages.AMSuicideRequestProto request) throws
-                                                                                           IOException,
-                                                                                           YarnException {
+  public Messages.AMSuicideResponseProto amSuicide(
+      Messages.AMSuicideRequestProto request)
+      throws IOException, YarnException {
     int signal = request.getSignal();
     String text = request.getText();
     int delay = request.getDelay();
     log.info("AM Suicide with signal {}, message {} delay = {}", signal, text, delay);
-    SliderUtils.haltAM(signal, text, delay);
-    Messages.AMSuicideResponseProto.Builder builder =
-      Messages.AMSuicideResponseProto.newBuilder();
-    return builder.build();
+    ActionHalt action = new ActionHalt(signal, text, delay,
+        TimeUnit.MILLISECONDS);
+    schedule(action);
+    return Messages.AMSuicideResponseProto.getDefaultInstance();
   }
 
   /* =================================================================== */
@@ -1368,10 +1525,10 @@
   /**
    * Update the cluster description with anything interesting
    */
-  public synchronized void updateClusterStatus() {
+  public synchronized ClusterDescription updateClusterStatus() {
     Map<String, String> providerStatus = providerService.buildProviderStatus();
     assert providerStatus != null : "null provider status";
-    appState.refreshClusterStatus(providerStatus);
+    return appState.refreshClusterStatus(providerStatus);
   }
 
   /**
@@ -1385,7 +1542,7 @@
   protected synchronized void launchProviderService(AggregateConf instanceDefinition,
                                                     File confDir)
     throws IOException, SliderException {
-    Map<String, String> env = new HashMap<>();
+    Map<String, String> env = new HashMap<String, String>();
     boolean execStarted = providerService.exec(instanceDefinition, confDir, env, this);
     if (execStarted) {
       providerService.registerServiceListener(this);
@@ -1418,28 +1575,26 @@
     }
   }
 
-
-  /* =================================================================== */
-  /* ProviderAMOperations */
-  /* =================================================================== */
-
   /**
-   * Refreshes the container by releasing it and having it reallocated
+   * report container loss. If this isn't already known about, react
    *
-   * @param containerId       id of the container to release
-   * @param newHostIfPossible allocate the replacement container on a new host
-   *
+   * @param containerId       id of the container which has failed
    * @throws SliderException
    */
-  public void refreshContainer(String containerId, boolean newHostIfPossible)
+  public synchronized void providerLostContainer(
+      ContainerId containerId)
       throws SliderException {
-    log.info(
-        "Refreshing container {} per provider request.",
+    log.info("containerLostContactWithProvider: container {} lost",
         containerId);
-    rmOperationHandler.execute(appState.releaseContainer(containerId));
-
-    // ask for more containers if needed
-    reviewRequestAndReleaseNodes();
+    RoleInstance activeContainer = appState.getOwnedContainer(containerId);
+    if (activeContainer != null) {
+      executeRMOperations(appState.releaseContainer(containerId));
+      // ask for more containers if needed
+      log.info("Container released; triggering review");
+      reviewRequestAndReleaseNodes();
+    } else {
+      log.info("Container not in active set - ignoring");
+    }
   }
 
   /* =================================================================== */
@@ -1499,7 +1654,6 @@
    * @param ctx context
    * @param instance node details
    */
-  @Override // ContainerStartOperation
   public void startContainer(Container container,
                              ContainerLaunchContext ctx,
                              RoleInstance instance) {
@@ -1527,10 +1681,14 @@
     LOG_YARN.info("Started Container {} ", containerId);
     RoleInstance cinfo = appState.onNodeManagerContainerStarted(containerId);
     if (cinfo != null) {
-      LOG_YARN.info("Deployed instance of role {}", cinfo.role);
+      LOG_YARN.info("Deployed instance of role {} onto {}",
+          cinfo.role, containerId);
       //trigger an async container status
       nmClientAsync.getContainerStatusAsync(containerId,
                                             cinfo.container.getNodeId());
+      // push out a registration
+      queue(new RegisterComponentInstance(containerId, 0, TimeUnit.MILLISECONDS));
+      
     } else {
       //this is a hypothetical path not seen. We react by warning
       log.error("Notified of started container that isn't pending {} - releasing",
@@ -1550,7 +1708,7 @@
   public void onContainerStatusReceived(ContainerId containerId,
                                         ContainerStatus containerStatus) {
     LOG_YARN.debug("Container Status: id={}, status={}", containerId,
-                   containerStatus);
+        containerStatus);
   }
 
   @Override //  NMClientAsync.CallbackHandler 
@@ -1590,14 +1748,84 @@
   }
 
   /**
-   * Get the username for the slider cluster as set in the environment
-   * @return the username or null if none was set/it is a secure cluster
+   * Queue an action for immediate execution in the executor thread
+   * @param action action to execute
    */
-  public String getHadoop_user_name() {
-    return hadoop_user_name;
+  public void queue(AsyncAction action) {
+    actionQueues.put(action);
   }
 
   /**
+   * Schedule an action
+   * @param action for delayed execution
+   */
+  public void schedule(AsyncAction action) {
+    actionQueues.schedule(action);
+  }
+
+
+  /**
+   * Handle any exception in a thread. If the exception provides an exit
+   * code, that is the one that will be used
+   * @param thread thread throwing the exception
+   * @param exception exception
+   */
+  public void onExceptionInThread(Thread thread, Exception exception) {
+    log.error("Exception in {}: {}", thread.getName(), exception, exception);
+    int exitCode = EXIT_EXCEPTION_THROWN;
+    if (exception instanceof ExitCodeProvider) {
+      exitCode = ((ExitCodeProvider) exception).getExitCode();
+    }
+    signalAMComplete(exitCode, exception.toString());
+  }
+
+  /**
+   * Start the chaos monkey
+   * @return true if it started
+   */
+  private boolean maybeStartMonkey() {
+    MapOperations internals = getGlobalInternalOptions();
+
+    Boolean enabled =
+        internals.getOptionBool(InternalKeys.CHAOS_MONKEY_ENABLED,
+            InternalKeys.DEFAULT_CHAOS_MONKEY_ENABLED);
+    if (!enabled) {
+      log.info("Chaos monkey disabled");
+    }
+    
+    long monkeyInterval = internals.getTimeRange(
+        InternalKeys.CHAOS_MONKEY_INTERVAL,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES,
+        0);
+    log.info("Adding Chaos Monkey scheduled every {} seconds ({} hours)",
+        monkeyInterval, monkeyInterval/(60*60));
+    monkey = new ChaosMonkeyService(metrics, actionQueues);
+    int amKillProbability = internals.getOptionInt(
+        InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE);
+    if (amKillProbability > 0) {
+      monkey.addTarget("AM killer",
+          new ChaosKillAM(actionQueues, -1), amKillProbability
+      );
+    }
+    int containerKillProbability = internals.getOptionInt(
+        InternalKeys.CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE);
+    if (containerKillProbability > 0) {
+      monkey.addTarget("Container killer",
+          new ChaosKillContainer(appState, actionQueues, rmOperationHandler),
+          containerKillProbability
+      );
+    }
+    initAndAddService(monkey);
+    // and schedule it
+    schedule(monkey.getChaosAction(monkeyInterval, TimeUnit.SECONDS));
+    return true;
+  }
+  
+  /**
    * This is the main entry point for the service launcher.
    * @param args command line arguments.
    */
@@ -1606,10 +1834,11 @@
     //turn the args to a list
     List<String> argsList = Arrays.asList(args);
     //create a new list, as the ArrayList type doesn't push() on an insert
-    List<String> extendedArgs = new ArrayList<>(argsList);
+    List<String> extendedArgs = new ArrayList<String>(argsList);
     //insert the service name
     extendedArgs.add(0, SERVICE_CLASSNAME);
     //now have the service launcher do its work
     ServiceLauncher.serviceMain(extendedArgs);
   }
+
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java
new file mode 100644
index 0000000..c21e249
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionHalt.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Exit a JVM halt.
+ * @see ExitUtil#halt(int, String) 
+ */
+public class ActionHalt extends AsyncAction {
+
+  private final int status;
+  private final String text;
+
+  public ActionHalt(
+      int status,
+      String text,
+      long delay, TimeUnit timeUnit) {
+    super("Halt", delay, ActionAttributes.HALTS_CLUSTER);
+    this.status = status;
+    this.text = text;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    ExitUtil.halt(status, text);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java
new file mode 100644
index 0000000..c1e7e6e
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation;
+import org.apache.slider.server.appmaster.operations.RMOperationHandler;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+public class ActionKillContainer extends AsyncAction {
+
+  private final ContainerId containerId;
+  private final RMOperationHandler operationHandler;
+  public ActionKillContainer(
+      ContainerId containerId,
+      long delay,
+      TimeUnit timeUnit,
+      RMOperationHandler operationHandler) {
+    super("kill container", delay, timeUnit);
+    this.operationHandler = operationHandler;
+    Preconditions.checkArgument(containerId != null);
+    
+    this.containerId = containerId;
+  }
+
+  /**
+   * Get the container ID to kill
+   * @return
+   */
+  public ContainerId getContainerId() {
+    return containerId;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+      List<AbstractRMOperation> opsList = new LinkedList<AbstractRMOperation>();
+    ContainerReleaseOperation release = new ContainerReleaseOperation(containerId);
+    opsList.add(release);
+    //now apply the operations
+    operationHandler.execute(opsList);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java
new file mode 100644
index 0000000..d95dc74
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStartContainer.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.apache.slider.server.appmaster.state.RoleInstance;
+
+import java.util.Locale;
+
+/**
+ * Start a container
+ * @see SliderAppMaster#startContainer(Container, ContainerLaunchContext, RoleInstance) 
+ */
+public class ActionStartContainer extends AsyncAction {
+
+  private final Container container;
+  private final ContainerLaunchContext ctx;
+  private final RoleInstance instance;
+
+  public ActionStartContainer(String name,
+      long delay,
+      Container container,
+      ContainerLaunchContext ctx,
+      RoleInstance instance) {
+    super(
+        String.format(Locale.ENGLISH,
+            "%s %s: /",
+            name , container.getId().toString()), 
+        delay);
+    this.container = container;
+    this.ctx = ctx;
+    this.instance = instance;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    appMaster.startContainer(container, ctx, instance);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopQueue.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopQueue.java
new file mode 100644
index 0000000..66a3961
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopQueue.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Action to tell a queue executor to stop -after handing this on/executing it
+ */
+public class ActionStopQueue extends AsyncAction {
+
+  public ActionStopQueue(long delay) {
+    super("stop queue", delay);
+  }
+
+  public ActionStopQueue(long delay,
+      TimeUnit timeUnit) {
+    super("stop queue", delay, timeUnit);
+  }
+
+  public ActionStopQueue(String name,
+      long delay,
+      TimeUnit timeUnit) {
+    super(name, delay, timeUnit);
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    // no-op
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
new file mode 100644
index 0000000..f084383
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.core.main.LauncherExitCodes;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+public class ActionStopSlider extends AsyncAction {
+  public ActionStopSlider(String message,
+      long delay) {
+    super(message, delay, ActionAttributes.HALTS_CLUSTER);
+  }
+
+  public ActionStopSlider(String name,
+      long delay,
+      TimeUnit timeUnit) {
+    super(name, delay, timeUnit, ActionAttributes.HALTS_CLUSTER);
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    String message = name;
+    SliderAppMaster.getLog().info("SliderAppMasterApi.stopCluster: {}",
+        message);
+    appMaster.signalAMComplete(
+        LauncherExitCodes.EXIT_CLIENT_INITIATED_SHUTDOWN,
+        message);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java
new file mode 100644
index 0000000..996390d
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/AsyncAction.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.common.tools.SliderUtils;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.concurrent.Delayed;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+
+public abstract class AsyncAction implements Delayed {
+
+  private static final AtomicLong sequencer = new AtomicLong(0);
+
+  public final String name;
+  private long nanos;
+  private final EnumSet<ActionAttributes> attrs;
+  private final long sequenceNumber = sequencer.incrementAndGet();
+
+
+  protected AsyncAction(String name) {
+    this(name, 0);
+  }
+
+  protected AsyncAction(String name,
+      long delayMillis) {
+    this(name, delayMillis, TimeUnit.MILLISECONDS);
+  }
+
+  protected AsyncAction(String name,
+      long delay,
+      TimeUnit timeUnit) {
+    this.name = name;
+    this.setNanos(convertAndOffset(delay, timeUnit));
+    attrs = EnumSet.noneOf(ActionAttributes.class);
+  }
+
+  protected AsyncAction(String name,
+      long delay,
+      TimeUnit timeUnit,
+      EnumSet<ActionAttributes> attrs) {
+    this.name = name;
+    this.setNanos(convertAndOffset(delay, timeUnit));
+    this.attrs = attrs;
+  }
+
+  protected AsyncAction(String name,
+      long delay,
+      TimeUnit timeUnit,
+      ActionAttributes... attributes) {
+    this(name, delay, timeUnit);
+    Collections.addAll(attrs, attributes);
+  }
+  
+  protected AsyncAction(String name,
+      long delayMillis,
+      ActionAttributes... attributes) {
+    this(name, delayMillis, TimeUnit.MILLISECONDS);
+  }
+
+  protected long convertAndOffset(long delay, TimeUnit timeUnit) {
+    return now() + TimeUnit.NANOSECONDS.convert(delay, timeUnit);
+  }
+
+  /**
+   * The current time in nanos
+   * @return now
+   */
+  protected long now() {
+    return System.nanoTime();
+  }
+
+  @Override
+  public long getDelay(TimeUnit unit) {
+    return unit.convert(getNanos() - now(), TimeUnit.NANOSECONDS);
+  }
+
+  @Override
+  public int compareTo(Delayed that) {
+    if (this == that) {
+      return 0;
+    }
+    return SliderUtils.compareTo(
+        getDelay(TimeUnit.NANOSECONDS),
+        that.getDelay(TimeUnit.NANOSECONDS));
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb =
+        new StringBuilder(super.toString());
+    sb.append(" name='").append(name).append('\'');
+    sb.append(", nanos=").append(getNanos());
+    sb.append(", attrs=").append(attrs);
+    sb.append(", sequenceNumber=").append(sequenceNumber);
+    sb.append('}');
+    return sb.toString();
+  }
+
+  protected EnumSet<ActionAttributes> getAttrs() {
+    return attrs;
+  }
+
+  /**
+   * Ask if an action has a specific attribute
+   * @param attr attribute
+   * @return true iff the action has the specific attribute
+   */
+  public boolean hasAttr(ActionAttributes attr) {
+    return attrs.contains(attr);
+  }
+
+  /**
+   * Actual application
+   * @param appMaster
+   * @param queueService
+   * @param appState
+   * @throws IOException
+   */
+  public abstract void execute(SliderAppMaster appMaster,
+      QueueAccess queueService, AppState appState) throws Exception;
+
+  public long getNanos() {
+    return nanos;
+  }
+
+  public void setNanos(long nanos) {
+    this.nanos = nanos;
+  }
+
+  public enum ActionAttributes {
+    SHRINKS_CLUSTER,
+    EXPANDS_CLUSTER,
+    HALTS_CLUSTER,
+  }
+
+
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java
new file mode 100644
index 0000000..2aa67bb
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderReportedContainerLoss.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+/**
+ * Report container loss to the AM
+ * {@link SliderAppMaster#providerLostContainer(ContainerId)}
+ */
+public class ProviderReportedContainerLoss extends AsyncAction {
+
+  private final ContainerId containerId;
+  
+  public ProviderReportedContainerLoss(ContainerId containerId) {
+    super("lost container " + containerId);
+    this.containerId = containerId;
+  }
+
+  public ProviderReportedContainerLoss(
+      ContainerId containerId, long delayMillis) {
+    super("lost container " + containerId, delayMillis);
+    this.containerId = containerId;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    appMaster.providerLostContainer(containerId);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java
new file mode 100644
index 0000000..4577025
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ProviderStartupCompleted.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+public class ProviderStartupCompleted extends AsyncAction {
+
+  public ProviderStartupCompleted() {
+    super("ProviderStartupCompleted");
+  }
+
+  public ProviderStartupCompleted(long delayMillis) {
+    super("ProviderStartupCompleted", delayMillis);
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    appMaster.eventCallbackEvent(null);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java
new file mode 100644
index 0000000..cffaf5e
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueAccess.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+/**
+ * Access for queue operations
+ */
+public interface QueueAccess {
+  /**
+   * Put an action on the immediate queue -to be executed when the queue
+   * reaches it.
+   * @param action action to queue
+   */
+  void put(AsyncAction action);
+
+  /**
+   * Put a delayed action: this will only be added to the main queue
+   * after its action time has been reached
+   * @param action action to queue
+   */
+  void schedule(AsyncAction action);
+
+  /**
+   * Remove an action from the queues.
+   * @param action action to remove
+   * @return true if the action was removed
+   */
+  boolean remove(AsyncAction action);
+
+  /**
+   * Add a named renewing action
+   * @param name name
+   * @param renewingAction wrapped action
+   */
+  void renewing(String name,
+      RenewingAction<? extends AsyncAction> renewingAction);
+
+  /**
+   * Look up a renewing action
+   * @param name name of the action
+   * @return the action or null if none was found
+   */
+  RenewingAction<? extends AsyncAction> lookupRenewingAction(String name);
+
+  /**
+   * Remove a renewing action
+   * @param name action name name of the action
+   * @return true if the action was found and removed.
+   */
+  boolean removeRenewingAction(String name);
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueExecutor.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueExecutor.java
new file mode 100644
index 0000000..87956db
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueExecutor.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Executor for async actions - hands them off to the AM as 
+ * appropriate
+ */
+public class QueueExecutor implements Runnable {
+  private static final Logger log =
+      LoggerFactory.getLogger(QueueExecutor.class);
+
+  private final SliderAppMaster appMaster;
+  private final QueueService actionQueues;
+  private final AppState appState;
+
+  public QueueExecutor(SliderAppMaster appMaster,
+      QueueService actionQueues) {
+    Preconditions.checkNotNull(appMaster);
+    Preconditions.checkNotNull(actionQueues);
+
+    this.appMaster = appMaster;
+    this.actionQueues = actionQueues;
+    this.appState = appMaster.getAppState();
+  }
+
+  @VisibleForTesting
+  QueueExecutor(QueueService actionQueues) {
+    Preconditions.checkNotNull(actionQueues);
+    this.appMaster = null;
+    this.appState = null;
+    this.actionQueues = actionQueues;
+  }
+
+  /**
+   * Run until the queue has been told to stop
+   */
+  @Override
+  public void run() {
+    AsyncAction take = null;
+    try {
+      log.info("Queue Executor run() started");
+      do {
+        take = actionQueues.actionQueue.take();
+        log.debug("Executing {}", take);
+        
+        take.execute(appMaster, actionQueues, appState);
+      } while (!(take instanceof ActionStopQueue));
+      log.info("Queue Executor run() stopped");
+    } catch (Exception e) {
+      log.error("Exception processing {}: {}", take, e, e);
+      if (appMaster != null) {
+        appMaster.onExceptionInThread(Thread.currentThread(), e);
+      }
+    }
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java
new file mode 100644
index 0000000..6ad579d
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.slider.server.services.workflow.ServiceThreadFactory;
+import org.apache.slider.server.services.workflow.WorkflowExecutorService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.BlockingDeque;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.DelayQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * The Queue service provides immediate and scheduled queues, as well
+ * as an executor thread that moves queued actions from the scheduled
+ * queue to the immediate one.
+ * 
+ * <p>
+ * This code to be revisited to see if all that was needed is the single scheduled
+ * queue, implicitly making actions immediate by giving them an execution
+ * time of "now". It would force having a sequence number to all actions, one 
+ * which the queue would have to set from its (monotonic, thread-safe) counter
+ * on every submission, with a modified comparison operator. This would guarantee
+ * that earlier submissions were picked before later ones.
+ */
+public class QueueService extends WorkflowExecutorService<ExecutorService>
+implements Runnable, QueueAccess {
+  private static final Logger log =
+      LoggerFactory.getLogger(QueueService.class);
+  public static final String NAME = "Action Queue";
+
+  /**
+   * Immediate actions.
+   */
+  public final BlockingDeque<AsyncAction> actionQueue =
+      new LinkedBlockingDeque<AsyncAction>();
+
+  /**
+   * Actions to be scheduled in the future
+   */
+  public final DelayQueue<AsyncAction> scheduledActions = new DelayQueue<AsyncAction>();
+
+  /**
+   * Map of renewing actions by name ... this is to allow them to 
+   * be cancelled by name
+   */
+  private final Map<String, RenewingAction<? extends AsyncAction>> renewingActions
+      = new ConcurrentHashMap<String, RenewingAction<? extends AsyncAction>>();
+  
+  /**
+   * Create a queue instance with a single thread executor
+   */
+  public QueueService() {
+    super(NAME,
+        ServiceThreadFactory.singleThreadExecutor(NAME, true));
+  }
+
+  @Override
+  public void put(AsyncAction action) {
+    log.debug("Queueing {}", action);
+    actionQueue.add(action);
+  }
+
+  @Override
+  public void schedule(AsyncAction action) {
+    log.debug("Scheduling {}", action);
+    scheduledActions.add(action);
+  }
+
+  @Override
+  public boolean remove(AsyncAction action) {
+    boolean removedFromDelayQueue = scheduledActions.remove(action);
+    boolean removedFromActions = actionQueue.remove(action);
+    return removedFromActions || removedFromDelayQueue;
+  }
+  
+  @Override
+  public void renewing(String name,
+      RenewingAction<? extends AsyncAction> renewingAction) {
+    log.debug("Adding renewing Action \"{}\": {}", name,
+        renewingAction.getAction());
+    if (removeRenewingAction(name)) {
+      log.debug("Removed predecessor action");
+    }
+    renewingActions.put(name, renewingAction);
+    schedule(renewingAction);
+  } 
+
+  @Override
+  public RenewingAction<? extends AsyncAction> lookupRenewingAction(String name) {
+    return renewingActions.get(name);
+  }
+
+  @Override
+  public boolean removeRenewingAction(String name) {
+    RenewingAction<? extends AsyncAction> action = renewingActions.remove(name);
+     return action != null && remove(action);
+  }
+  
+  /**
+   * Stop the service by scheduling an {@link ActionStopQueue} action
+   * ..if the processor thread is working this will propagate through
+   * and stop the queue handling after all other actions complete.
+   * @throws Exception
+   */
+  @Override
+  protected void serviceStop() throws Exception {
+    ActionStopQueue stopQueue = new ActionStopQueue("serviceStop: "+ this,
+        0, TimeUnit.MILLISECONDS);
+    schedule(stopQueue);
+    super.serviceStop();
+  }
+
+  /**
+   * Flush an action queue of all types of a specific action
+   * @param clazz 
+   */
+  protected void flushActionQueue(Class<? extends AsyncAction> clazz) {
+    Iterator<AsyncAction> iterator =
+        actionQueue.descendingIterator();
+    while (iterator.hasNext()) {
+      AsyncAction next = iterator.next();
+      if (next.getClass().equals(clazz)) {
+        iterator.remove();
+      }
+    }
+  }
+  
+  /**
+   * Run until the queue has been told to stop
+   */
+  @Override
+  public void run() {
+    try {
+
+      log.info("QueueService processor started");
+
+      AsyncAction take;
+      do {
+        take = scheduledActions.take();
+        log.debug("Propagating {}", take);
+        actionQueue.put(take);
+      } while (!(take instanceof ActionStopQueue));
+      log.info("QueueService processor terminated");
+    } catch (InterruptedException e) {
+      //game over
+    }
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java
new file mode 100644
index 0000000..a8a6fe2
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RegisterComponentInstance.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+public class RegisterComponentInstance extends AsyncAction {
+  
+
+  public final ContainerId containerId;
+
+  public RegisterComponentInstance(ContainerId containerId, long delay,
+      TimeUnit timeUnit) {
+    super("RegisterComponentInstance :" + containerId,
+        delay, timeUnit);
+    Preconditions.checkArgument(containerId != null);
+    this.containerId = containerId;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+
+    appMaster.registerComponent(containerId);
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RenewingAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RenewingAction.java
new file mode 100644
index 0000000..c62582f
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/RenewingAction.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import com.google.common.base.Preconditions;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * This action executes then reschedules an inner action; a limit
+ * can specify the number of times to run
+ */
+
+public class RenewingAction<A extends AsyncAction> extends AsyncAction {
+  private static final Logger log =
+      LoggerFactory.getLogger(RenewingAction.class);
+  private final A action;
+  private final long interval;
+  private final TimeUnit timeUnit;
+  public final AtomicInteger executionCount = new AtomicInteger();
+  public final int limit;
+
+
+  /**
+   * Rescheduling action
+   * @param action action to execute
+   * @param initialDelay initial delay
+   * @param interval interval for later delays
+   * @param timeUnit time unit for all times
+   * @param limit limit on the no. of executions. If 0 or less: no limit
+   */
+  public RenewingAction(A action,
+      long initialDelay,
+      long interval,
+      TimeUnit timeUnit,
+      int limit) {
+    super("renewing " + action.name, initialDelay, timeUnit, action.getAttrs());
+    // slightly superfluous as the super init above checks these values...retained
+    // in case that code is ever changed
+    Preconditions.checkArgument(action != null, "null actions");
+    this.action = action;
+    this.interval = interval;
+    this.timeUnit = timeUnit;
+    this.limit = limit;
+  }
+
+  /**
+   * Execute the inner action then reschedule ourselves
+   * @param appMaster
+   * @param queueService
+   * @param appState
+   * @throws Exception
+   */
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState)
+      throws Exception {
+    long exCount = executionCount.incrementAndGet();
+    log.debug("{}: Executing inner action count # {}", this, exCount);
+    action.execute(appMaster, queueService, appState);
+    boolean reschedule = true;
+    if (limit > 0) {
+      reschedule = limit > exCount;
+    }
+    if (reschedule) {
+      this.setNanos(convertAndOffset(interval, timeUnit));
+      log.debug("{}: rescheduling, new offset {} mS ", this,
+          getDelay(TimeUnit.MILLISECONDS));
+      queueService.schedule(this);
+    }
+  }
+
+  /**
+   * Get the action
+   * @return
+   */
+  public A getAction() {
+    return action;
+  }
+
+  public long getInterval() {
+    return interval;
+  }
+
+  public TimeUnit getTimeUnit() {
+    return timeUnit;
+  }
+
+  public int getExecutionCount() {
+    return executionCount.get();
+  }
+
+  public int getLimit() {
+    return limit;
+  }
+}
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/TestStub.groovy b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java
similarity index 61%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/TestStub.groovy
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java
index f683ded..28bcf55 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/TestStub.groovy
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java
@@ -16,17 +16,24 @@
  * limitations under the License.
  */
 
-package org.apache.slider.providers.hbase
+package org.apache.slider.server.appmaster.actions;
 
-import org.junit.Test
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
 
 /**
- *  this is here to ensure there is always a test
+ * Requests the AM to reset the failure window
  */
-class TestStub {
+public class ResetFailureWindow extends AsyncAction {
 
-  @Test
-  public void testStubTest() throws Throwable {
+  public ResetFailureWindow() {
+    super("ResetFailureWindow");
+  }
 
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    appState.resetFailureCounts();
   }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java
new file mode 100644
index 0000000..78d9c1c
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/UnregisterComponentInstance.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+public class UnregisterComponentInstance extends AsyncAction {
+  
+
+  public final ContainerId containerId;
+
+  public UnregisterComponentInstance(ContainerId containerId, long delay,
+      TimeUnit timeUnit) {
+    super("UnregisterComponentInstance :" + containerId.toString(),
+        delay, timeUnit);
+    this.containerId = containerId;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    appMaster.unregisterComponent(containerId);
+
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java
new file mode 100644
index 0000000..5905d2f
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.base.Preconditions;
+import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Entry in the chaos list
+ */
+public class ChaosEntry {
+
+  protected static final Logger log =
+      LoggerFactory.getLogger(ChaosEntry.class);
+  public final String name;
+  public final ChaosTarget target;
+  public final long probability;
+
+  private final MetricRegistry metrics;
+  private final Counter invocationCounter;
+
+
+  /**
+   * Constructor -includes validation of all arguments
+   * @param name
+   * @param target
+   * @param probability
+   */
+  public ChaosEntry(String name, ChaosTarget target, long probability,
+      MetricRegistry metrics) {
+    Preconditions.checkArgument(!StringUtils.isEmpty(name), "missing name");
+    Preconditions.checkArgument(target != null, "null target");
+    Preconditions.checkArgument(probability > 0, "negative probability");
+    Preconditions.checkArgument(probability <= ChaosMonkeyService.PERCENT_100,
+        "probability over 100%");
+    this.name = name;
+    this.target = target;
+    this.probability = probability;
+    this.metrics = metrics;
+    invocationCounter =
+        metrics.counter(MetricRegistry.name(ChaosEntry.class, name));
+  }
+
+  /**
+   * Trigger the chaos action
+   */
+  public void invokeChaos() {
+    log.info("Invoking {}", name);
+    invocationCounter.inc();
+    target.chaosAction();
+  }
+
+  /**
+   * Invoke Chaos if the trigger value is in range of the probability
+   * @param value trigger value, 0-10K
+   * @return true if the chaos method was invoked
+   */
+  public boolean maybeInvokeChaos(long value) {
+    log.debug("Probability {} trigger={}", probability, value);
+    if (value < probability) {
+      invokeChaos();
+      return true;
+    }
+    return false;
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java
new file mode 100644
index 0000000..3c1a914
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import org.apache.slider.server.appmaster.actions.ActionHalt;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Kill the AM
+ */
+public class ChaosKillAM implements ChaosTarget {
+
+  public static final int DELAY = 1000;
+  private final QueueAccess queues;
+  private final int exitCode;
+
+  public ChaosKillAM(QueueAccess queues, int exitCode) {
+    this.queues = queues;
+    this.exitCode = exitCode;
+  }
+
+  /**
+   * Trigger a delayed halt
+   */
+  @Override
+  public void chaosAction() {
+    queues.schedule(new ActionHalt(exitCode, "Chaos invoked halt", DELAY,
+        TimeUnit.MILLISECONDS));
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
new file mode 100644
index 0000000..daf2590
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import com.google.common.base.Preconditions;
+import org.apache.slider.server.appmaster.actions.ActionKillContainer;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.operations.RMOperationHandler;
+import org.apache.slider.server.appmaster.state.AppState;
+import org.apache.slider.server.appmaster.state.RoleInstance;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Kill a container
+ */
+public class ChaosKillContainer implements ChaosTarget {
+  protected static final Logger log =
+      LoggerFactory.getLogger(ChaosKillContainer.class);
+  public static final int DELAY = 100;
+  private final AppState appState;
+  private final QueueAccess queues;
+  private final Random random = new Random();
+  private final RMOperationHandler operationHandler;
+
+  public ChaosKillContainer(AppState appState,
+      QueueAccess queues,
+      RMOperationHandler operationHandler) {
+    Preconditions.checkNotNull(appState);
+    Preconditions.checkNotNull(queues);
+    this.appState = appState;
+    this.queues = queues;
+    this.operationHandler = operationHandler;
+  }
+
+  /**
+   * Trigger a container kill 
+   */
+  @Override
+  public void chaosAction() {
+    List<RoleInstance> liveContainers =
+        appState.cloneLiveContainerInfoList();
+    int size = liveContainers.size();
+    if (size == 0) {
+      log.info("No containers to kill");
+      return;
+    }
+    int target = random.nextInt(size);
+    RoleInstance roleInstance = liveContainers.get(target);
+    log.info("Killing {}", roleInstance);
+
+    queues.schedule(new ActionKillContainer(roleInstance.getId(),
+        DELAY, TimeUnit.MILLISECONDS, operationHandler));
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java
new file mode 100644
index 0000000..592889c
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import com.codahale.metrics.MetricRegistry;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.actions.RenewingAction;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A chaos monkey service which will invoke ChaosTarget events 
+ */
+public class ChaosMonkeyService extends AbstractService {
+  protected static final Logger log =
+      LoggerFactory.getLogger(ChaosMonkeyService.class);
+  public static final int PERCENT_1 = 100;
+  public static final double PERCENT_1D = 100.0;
+  
+  /**
+   * the percentage value as multiplied up
+   */
+  public static final int PERCENT_100 = 100 * PERCENT_1;
+  private final MetricRegistry metrics;
+  private final QueueAccess queues;
+  private final Random random = new Random();
+
+  private static final List<ChaosEntry> chaosEntries =
+      new ArrayList<ChaosEntry>();
+
+  public ChaosMonkeyService(MetricRegistry metrics, QueueAccess queues) {
+    super("ChaosMonkeyService");
+    this.metrics = metrics;
+    this.queues = queues;
+  }
+
+
+  public synchronized void addTarget(String name,
+      ChaosTarget target, long probability) {
+    log.info("Adding {} with probability {}", name, probability / PERCENT_1);
+    chaosEntries.add(new ChaosEntry(name, target, probability, metrics));
+  }
+
+  /**
+   * Iterate through all the entries and invoke chaos on those wanted
+   */
+  public void play() {
+    for (ChaosEntry chaosEntry : chaosEntries) {
+      long p = random.nextInt(PERCENT_100);
+      chaosEntry.maybeInvokeChaos(p);
+    }
+  }
+
+  public RenewingAction<MonkeyPlayAction> getChaosAction(long time, TimeUnit timeUnit) {
+    RenewingAction<MonkeyPlayAction> action = new RenewingAction<MonkeyPlayAction>(
+        new MonkeyPlayAction(this, 0, TimeUnit.MILLISECONDS),
+        time,
+        time,
+        timeUnit,
+        0
+    );
+    return action;
+  }
+}
diff --git a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosTarget.java
similarity index 87%
copy from slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosTarget.java
index eefccbb..1c3a9ac 100644
--- a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosTarget.java
@@ -16,7 +16,9 @@
  * limitations under the License.
  */
 
-package org.apache.slider.funtest.accumulo;
+package org.apache.slider.server.appmaster.monkey;
 
-class StubToForceGroovySrcToCompile {
+public interface ChaosTarget {
+
+  public void chaosAction();
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/MonkeyPlayAction.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/MonkeyPlayAction.java
new file mode 100644
index 0000000..20e4466
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/MonkeyPlayAction.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.monkey;
+
+import org.apache.slider.server.appmaster.SliderAppMaster;
+import org.apache.slider.server.appmaster.actions.AsyncAction;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
+import org.apache.slider.server.appmaster.state.AppState;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Queueable action which calls {@link ChaosMonkeyService#play()} when
+ * executed.
+ */
+public class MonkeyPlayAction extends AsyncAction {
+
+  private final ChaosMonkeyService monkey;
+
+  public MonkeyPlayAction(ChaosMonkeyService monkey, long delay,
+      TimeUnit timeUnit) {
+    super("chaos monkey", delay, timeUnit);
+    this.monkey = monkey;
+  }
+
+  @Override
+  public void execute(SliderAppMaster appMaster,
+      QueueAccess queueService,
+      AppState appState) throws Exception {
+    monkey.play();
+  }
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AbstractRMOperation.java
similarity index 84%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AbstractRMOperation.java
index e3e595f..2c55215 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AbstractRMOperation.java
@@ -16,16 +16,14 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
 
-public class AbstractRMOperation {
+public abstract class AbstractRMOperation {
 
   /**
    * Execute the operation
    * @param asyncRMClient client
    */
-  public void execute(RMOperationHandler handler) {
-
-  }
+  public abstract void execute(RMOperationHandler handler);
   
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
similarity index 93%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
index 171c021..f7a95a7 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
@@ -16,12 +16,11 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster;
+package org.apache.slider.server.appmaster.operations;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.client.api.AMRMClient;
 import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
-import org.apache.slider.server.appmaster.state.RMOperationHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -45,6 +44,7 @@
   }
 
   @Override
+  @SuppressWarnings("unchecked")
   public void addContainerRequest(AMRMClient.ContainerRequest req) {
     client.addContainerRequest(req);
   }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerReleaseOperation.java
similarity index 95%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseOperation.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerReleaseOperation.java
index 8e73f19..3d2016b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerReleaseOperation.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
 
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerRequestOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerRequestOperation.java
similarity index 95%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerRequestOperation.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerRequestOperation.java
index 25c3d60..711bb98 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerRequestOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ContainerRequestOperation.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
 
 import org.apache.hadoop.yarn.client.api.AMRMClient;
 
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
similarity index 60%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
index 171c021..a24d9e5 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AsyncRMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
@@ -16,36 +16,28 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster;
+package org.apache.slider.server.appmaster.operations;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.client.api.AMRMClient;
-import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
-import org.apache.slider.server.appmaster.state.RMOperationHandler;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.slider.providers.ProviderService;
 
-/**
- * Hands off RM operations to the Resource Manager
- */
-public class AsyncRMOperationHandler extends RMOperationHandler {
-  protected static final Logger log =
-    LoggerFactory.getLogger(AsyncRMOperationHandler.class);
-  private final AMRMClientAsync client;
+public class ProviderNotifyingOperationHandler extends RMOperationHandler {
+  
+  final ProviderService providerService;
 
-  public AsyncRMOperationHandler(AMRMClientAsync client) {
-    this.client = client;
+  public ProviderNotifyingOperationHandler(ProviderService providerService) {
+    this.providerService = providerService;
   }
 
   @Override
   public void releaseAssignedContainer(ContainerId containerId) {
-    log.debug("Releasing container {}", containerId);
-
-    client.releaseAssignedContainer(containerId);
+    providerService.releaseAssignedContainer(containerId);
   }
 
   @Override
   public void addContainerRequest(AMRMClient.ContainerRequest req) {
-    client.addContainerRequest(req);
+    providerService.addContainerRequest(req);
+
   }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandler.java
similarity index 75%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/state/RMOperationHandler.java
rename to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandler.java
index 4106b16..2b6e9e2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandler.java
@@ -16,19 +16,11 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster.state;
-
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.client.api.AMRMClient;
+package org.apache.slider.server.appmaster.operations;
 
 import java.util.List;
 
-public abstract class RMOperationHandler {
-
-
-  public abstract void releaseAssignedContainer(ContainerId containerId);
-
-  public abstract void addContainerRequest(AMRMClient.ContainerRequest req);
+public abstract class RMOperationHandler implements RMOperationHandlerActions {
 
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
similarity index 70%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
index e3e595f..6659cc9 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
@@ -16,16 +16,13 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster.state;
+package org.apache.slider.server.appmaster.operations;
 
-public class AbstractRMOperation {
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
 
-  /**
-   * Execute the operation
-   * @param asyncRMClient client
-   */
-  public void execute(RMOperationHandler handler) {
+public interface RMOperationHandlerActions {
+  void releaseAssignedContainer(ContainerId containerId);
 
-  }
-  
+  void addContainerRequest(AMRMClient.ContainerRequest req);
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index cc238ff..07976ef 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -19,6 +19,7 @@
 package org.apache.slider.server.appmaster.state;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -35,7 +36,6 @@
 import org.apache.slider.api.ClusterDescriptionKeys;
 import org.apache.slider.api.ClusterDescriptionOperations;
 import org.apache.slider.api.ClusterNode;
-import org.apache.slider.api.OptionKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.api.RoleKeys;
 import org.apache.slider.api.StatusKeys;
@@ -54,8 +54,10 @@
 import org.apache.slider.core.exceptions.NoSuchNodeException;
 import org.apache.slider.core.exceptions.SliderInternalStateException;
 import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
-import org.apache.slider.core.registry.docstore.PublishedConfigSet;
 import org.apache.slider.providers.ProviderRole;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation;
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,6 +67,7 @@
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
@@ -98,7 +101,7 @@
    * Flag set to indicate the application is live -this only happens
    * after the buildInstance operation
    */
-  boolean applicationLive = false;
+  private boolean applicationLive = false;
 
   /**
    * The definition of the instance. Flexing updates the resources section
@@ -132,7 +135,7 @@
    * Client properties created via the provider -static for the life
    * of the application
    */
-  private Map<String, String> clientProperties = new HashMap<>();
+  private Map<String, String> clientProperties = new HashMap<String, String>();
 
   /**
    The cluster description published to callers
@@ -143,10 +146,13 @@
   private ClusterDescription clusterSpec = new ClusterDescription();
 
   private final Map<Integer, RoleStatus> roleStatusMap =
-    new ConcurrentHashMap<>();
+    new ConcurrentHashMap<Integer, RoleStatus>();
 
   private final Map<String, ProviderRole> roles =
-    new ConcurrentHashMap<>();
+    new ConcurrentHashMap<String, ProviderRole>();
+
+  private final Map<Integer, ProviderRole> rolePriorityMap = 
+    new ConcurrentHashMap<Integer, ProviderRole>();
 
   /**
    * The master node.
@@ -157,8 +163,8 @@
    * Hash map of the containers we have. This includes things that have
    * been allocated but are not live; it is a superset of the live list
    */
-  private final ConcurrentMap<ContainerId, RoleInstance> activeContainers =
-    new ConcurrentHashMap<>();
+  private final ConcurrentMap<ContainerId, RoleInstance> ownedContainers =
+    new ConcurrentHashMap<ContainerId, RoleInstance>();
 
   /**
    * Hash map of the containers we have released, but we
@@ -166,7 +172,7 @@
    * containers is treated as a successful outcome
    */
   private final ConcurrentMap<ContainerId, Container> containersBeingReleased =
-    new ConcurrentHashMap<>();
+    new ConcurrentHashMap<ContainerId, Container>();
   
   /**
    * Counter for completed containers ( complete denotes successful or failed )
@@ -201,34 +207,34 @@
    * the node is promoted from here to the containerMap
    */
   private final Map<ContainerId, RoleInstance> startingNodes =
-    new ConcurrentHashMap<>();
+    new ConcurrentHashMap<ContainerId, RoleInstance>();
 
   /**
    * List of completed nodes. This isn't kept in the CD as it gets too
    * big for the RPC responses. Indeed, we should think about how deep to get this
    */
   private final Map<ContainerId, RoleInstance> completedNodes
-    = new ConcurrentHashMap<>();
+    = new ConcurrentHashMap<ContainerId, RoleInstance>();
 
   /**
    * Nodes that failed to start.
    * Again, kept out of the CD
    */
   private final Map<ContainerId, RoleInstance> failedNodes =
-    new ConcurrentHashMap<>();
+    new ConcurrentHashMap<ContainerId, RoleInstance>();
 
   /**
    * Nodes that came assigned to a role above that
    * which were asked for -this appears to happen
    */
-  private final Set<ContainerId> surplusNodes = new HashSet<>();
+  private final Set<ContainerId> surplusNodes = new HashSet<ContainerId>();
 
   /**
    * Map of containerID -> cluster nodes, for status reports.
    * Access to this should be synchronized on the clusterDescription
    */
   private final Map<ContainerId, RoleInstance> liveNodes =
-    new ConcurrentHashMap<>();
+    new ConcurrentHashMap<ContainerId, RoleInstance>();
   private final AtomicInteger completionOfNodeNotInLiveListEvent =
     new AtomicInteger();
   private final AtomicInteger completionOfUnknownContainerEvent =
@@ -250,6 +256,10 @@
   private long startTimeThreshold;
   
   private int failureThreshold = 10;
+  
+  private String logServerURL = "";
+  
+  private ContainerReleaseSelector containerReleaseSelector;
 
   public AppState(AbstractRecordFactory recordFactory) {
     this.recordFactory = recordFactory;
@@ -313,6 +323,10 @@
     return roles;
   }
 
+  public Map<Integer, ProviderRole> getRolePriorityMap() {
+    return rolePriorityMap;
+  }
+
   private Map<ContainerId, RoleInstance> getStartingNodes() {
     return startingNodes;
   }
@@ -432,28 +446,34 @@
   /**
    * Build up the application state
    * @param instanceDefinition definition of the applicatin instance
+   * @param appmasterConfig
    * @param publishedProviderConf any configuration info to be published by a provider
    * @param providerRoles roles offered by a provider
    * @param fs filesystem
    * @param historyDir directory containing history files
    * @param liveContainers list of live containers supplied on an AM restart
    * @param applicationInfo
+   * @param releaseSelector
    */
   public synchronized void buildInstance(AggregateConf instanceDefinition,
-                            Configuration publishedProviderConf,
-                            List<ProviderRole> providerRoles,
-                            FileSystem fs,
-                            Path historyDir,
-                            List<Container> liveContainers,
-                            Map<String, String> applicationInfo) throws
-                                                                 BadClusterStateException,
-                                                                 BadConfigException,
-                                                                 IOException {
-    this.publishedProviderConf = publishedProviderConf;
-    this.applicationInfo = applicationInfo != null ? applicationInfo 
-                                         : new HashMap<String, String>();
+      Configuration appmasterConfig,
+      Configuration publishedProviderConf,
+      List<ProviderRole> providerRoles,
+      FileSystem fs,
+      Path historyDir,
+      List<Container> liveContainers,
+      Map<String, String> applicationInfo,
+      SimpleReleaseSelector releaseSelector)
+      throws  BadClusterStateException, BadConfigException, IOException {
+    Preconditions.checkArgument(instanceDefinition != null);
+    Preconditions.checkArgument(releaseSelector != null);
 
-    clientProperties = new HashMap<>();
+    this.publishedProviderConf = publishedProviderConf;
+    this.applicationInfo = applicationInfo != null ? applicationInfo
+                                                   : new HashMap<String, String>();
+
+    clientProperties = new HashMap<String, String>();
+    containerReleaseSelector = releaseSelector;
 
 
     Set<String> confKeys = ConfigHelper.sortedConfigKeys(publishedProviderConf);
@@ -463,8 +483,8 @@
       String val = publishedProviderConf.get(key);
       clientProperties.put(key, val);
     }
-    
-    
+
+
     // set the cluster specification (once its dependency the client properties
     // is out the way
 
@@ -477,15 +497,16 @@
     }
 
     ConfTreeOperations resources =
-      instanceDefinition.getResourceOperations();
-    
+        instanceDefinition.getResourceOperations();
+
     Set<String> roleNames = resources.getComponentNames();
     for (String name : roleNames) {
       if (!roles.containsKey(name)) {
         // this is a new value
         log.info("Adding new role {}", name);
         MapOperations resComponent = resources.getComponent(name);
-        ProviderRole dynamicRole = createDynamicProviderRole(name, resComponent);
+        ProviderRole dynamicRole =
+            createDynamicProviderRole(name, resComponent);
         buildRole(dynamicRole);
         providerRoles.add(dynamicRole);
       }
@@ -495,25 +516,29 @@
 
 
     //set the livespan
-    MapOperations globalInternalOpts =
-      instanceDefinition.getInternalOperations().getGlobalOptions();
-    startTimeThreshold = globalInternalOpts.getOptionInt(
-      OptionKeys.INTERNAL_CONTAINER_FAILURE_SHORTLIFE,
-      OptionKeys.DEFAULT_CONTAINER_FAILURE_SHORTLIFE);
+    MapOperations globalResOpts =
+        instanceDefinition.getResourceOperations().getGlobalOptions();
     
-    failureThreshold = globalInternalOpts.getOptionInt(
-      OptionKeys.INTERNAL_CONTAINER_FAILURE_THRESHOLD,
-      OptionKeys.DEFAULT_CONTAINER_FAILURE_THRESHOLD);
+    startTimeThreshold = globalResOpts.getOptionInt(
+        ResourceKeys.CONTAINER_FAILURE_SHORTLIFE,
+        ResourceKeys.DEFAULT_CONTAINER_FAILURE_SHORTLIFE);
+
+    failureThreshold = globalResOpts.getOptionInt(
+        ResourceKeys.CONTAINER_FAILURE_THRESHOLD,
+        ResourceKeys.DEFAULT_CONTAINER_FAILURE_THRESHOLD);
     initClusterStatus();
 
 
     // add the roles
     roleHistory = new RoleHistory(providerRoles);
     roleHistory.onStart(fs, historyDir);
-    
+
     //rebuild any live containers
     rebuildModelFromRestart(liveContainers);
-    
+
+    // any am config options to pick up
+
+    logServerURL = appmasterConfig.get(YarnConfiguration.YARN_LOG_SERVER_URL, "");
     //mark as live
     applicationLive = true;
   }
@@ -611,7 +636,7 @@
   }
   
   /**
-   * The resource configuration is updated -review and update state
+   * The resource configuration is updated -review and update state.
    * @param resources updated resources specification
    */
   public synchronized void updateResourceDefinitions(ConfTree resources) throws
@@ -690,8 +715,9 @@
                                    roleStatusMap.get(priority));
     }
     roleStatusMap.put(priority,
-                      new RoleStatus(providerRole));
+        new RoleStatus(providerRole));
     roles.put(providerRole.name, providerRole);
+    rolePriorityMap.put(priority, providerRole);
   }
 
   /**
@@ -766,41 +792,111 @@
   }
 
 
-  public synchronized List<RoleInstance> cloneActiveContainerList() {
-    Collection<RoleInstance> values = activeContainers.values();
-    return new ArrayList<>(values);
+  /**
+   * Clone the list of active (==owned) containers
+   * @return the list of role instances representing all owned containers
+   */
+  public synchronized List<RoleInstance> cloneOwnedContainerList() {
+    Collection<RoleInstance> values = ownedContainers.values();
+    return new ArrayList<RoleInstance>(values);
+  }
+
+  /**
+   * Get the number of active (==owned) containers
+   * @return
+   */
+  public int getNumOwnedContainers() {
+    return ownedContainers.size();
   }
   
-
-  public int getNumActiveContainers() {
-    return activeContainers.size();
-  }
-  
-
-  public RoleInstance getActiveContainer(ContainerId id) {
-    return activeContainers.get(id);
+  /**
+   * Look up an active container: any container that the AM has, even
+   * if it is not currently running/live
+   */
+  public RoleInstance getOwnedContainer(ContainerId id) {
+    return ownedContainers.get(id);
   }
 
+  /**
+   * Remove an owned container
+   * @param id container ID
+   * @return the instance removed
+   */
+  private RoleInstance removeOwnedContainer(ContainerId id) {
+    return ownedContainers.remove(id);
+  }
 
+  /**
+   * set/update an owned container
+   * @param id container ID
+   * @param instance
+   * @return
+   */
+  private RoleInstance putOwnedContainer(ContainerId id,
+      RoleInstance instance) {
+    return ownedContainers.put(id, instance);
+  }
+
+  /**
+   * Clone the live container list. This is synchronized.
+   * @return a snapshot of the live node list
+   */
   public synchronized List<RoleInstance> cloneLiveContainerInfoList() {
     List<RoleInstance> allRoleInstances;
     Collection<RoleInstance> values = getLiveNodes().values();
-    allRoleInstances = new ArrayList<>(values);
+    allRoleInstances = new ArrayList<RoleInstance>(values);
     return allRoleInstances;
   }
 
-
-
+  /**
+   * Lookup live instance by string value of container ID
+   * @param containerId container ID as a string
+   * @return the role instance for that container
+   * @throws NoSuchNodeException if it does not exist
+   */
   public synchronized RoleInstance getLiveInstanceByContainerID(String containerId)
-    throws NoSuchNodeException {
+      throws NoSuchNodeException {
     Collection<RoleInstance> nodes = getLiveNodes().values();
+    return findNodeInCollection(containerId, nodes);
+  }
+
+  /**
+   * Lookup owned instance by string value of container ID
+   * @param containerId container ID as a string
+   * @return the role instance for that container
+   * @throws NoSuchNodeException if it does not exist
+   */
+  public synchronized RoleInstance getOwnedInstanceByContainerID(String containerId)
+      throws NoSuchNodeException {
+    Collection<RoleInstance> nodes = ownedContainers.values();
+    return findNodeInCollection(containerId, nodes);
+  }
+
+  
+  
+  /**
+   * Iterate through a collection of role instances to find one with a
+   * specific (string) container ID
+   * @param containerId container ID as a string
+   * @param nodes collection
+   * @return 
+   * @throws NoSuchNodeException if there was no match
+   */
+  private RoleInstance findNodeInCollection(String containerId,
+      Collection<RoleInstance> nodes) throws NoSuchNodeException {
+    RoleInstance found = null;
     for (RoleInstance node : nodes) {
       if (containerId.equals(node.id)) {
-        return node;
+        found = node;
+        break;
       }
     }
-    //at this point: no node
-    throw new NoSuchNodeException(containerId);
+    if (found != null) {
+      return found;
+    } else {
+      //at this point: no node
+      throw new NoSuchNodeException(containerId);
+    }
   }
 
 
@@ -808,7 +904,7 @@
     Collection<String> containerIDs) {
     //first, a hashmap of those containerIDs is built up
     Set<String> uuidSet = new HashSet<String>(containerIDs);
-    List<RoleInstance> nodes = new ArrayList<>(uuidSet.size());
+    List<RoleInstance> nodes = new ArrayList<RoleInstance>(uuidSet.size());
     Collection<RoleInstance> clusterNodes = getLiveNodes().values();
 
     for (RoleInstance node : clusterNodes) {
@@ -826,7 +922,7 @@
    * @return a list of nodes, may be empty
    */
   public synchronized List<RoleInstance> enumLiveNodesInRole(String role) {
-    List<RoleInstance> nodes = new ArrayList<>();
+    List<RoleInstance> nodes = new ArrayList<RoleInstance>();
     Collection<RoleInstance> allRoleInstances = getLiveNodes().values();
     for (RoleInstance node : allRoleInstances) {
       if (role.isEmpty() || role.equals(node.role)) {
@@ -836,17 +932,38 @@
     return nodes;
   }
 
+ 
+  /**
+   * enum nodes by role ID, from either the active or live node list
+   * @param roleId role the container must be in
+   * @param active flag to indicate "use active list" rather than the smaller
+   * "live" list
+   * @return a list of nodes, may be empty
+   */
+  public synchronized List<RoleInstance> enumNodesWithRoleId(int roleId,
+      boolean active) {
+    List<RoleInstance> nodes = new ArrayList<RoleInstance>();
+    Collection<RoleInstance> allRoleInstances;
+    allRoleInstances = active? ownedContainers.values() : liveNodes.values();
+    for (RoleInstance node : allRoleInstances) {
+      if (node.roleId == roleId) {
+        nodes.add(node);
+      }
+    }
+    return nodes;
+  }
+
 
   /**
    * Build an instance map.
    * @return the map of Role name to list of role instances
    */
   private synchronized Map<String, List<String>> createRoleToInstanceMap() {
-    Map<String, List<String>> map = new HashMap<>();
+    Map<String, List<String>> map = new HashMap<String, List<String>>();
     for (RoleInstance node : getLiveNodes().values()) {
       List<String> containers = map.get(node.role);
       if (containers == null) {
-        containers = new ArrayList<>();
+        containers = new ArrayList<String>();
         map.put(node.role, containers);
       }
       containers.add(node.id);
@@ -858,12 +975,12 @@
    * @return the map of Role name to list of Cluster Nodes, ready
    */
   private synchronized Map<String, Map<String, ClusterNode>> createRoleToClusterNodeMap() {
-    Map<String, Map<String, ClusterNode>> map = new HashMap<>();
+    Map<String, Map<String, ClusterNode>> map = new HashMap<String, Map<String, ClusterNode>>();
     for (RoleInstance node : getLiveNodes().values()) {
       
       Map<String, ClusterNode> containers = map.get(node.role);
       if (containers == null) {
-        containers = new HashMap<>();
+        containers = new HashMap<String, ClusterNode>();
         map.put(node.role, containers);
       }
       Messages.RoleInstanceState pbuf = node.toProtobuf();
@@ -885,7 +1002,7 @@
     instance.container = container;
     instance.createTime = now();
     getStartingNodes().put(container.getId(), instance);
-    activeContainers.put(container.getId(), instance);
+    putOwnedContainer(container.getId(), instance);
     roleHistory.onContainerStartSubmitted(container, instance);
   }
 
@@ -902,19 +1019,19 @@
       throws SliderInternalStateException {
     ContainerId id = container.getId();
     //look up the container
-    RoleInstance info = getActiveContainer(id);
-    if (info == null) {
+    RoleInstance instance = getOwnedContainer(id);
+    if (instance == null) {
       throw new SliderInternalStateException(
-        "No active container with ID " + id.toString());
+        "No active container with ID " + id);
     }
     //verify that it isn't already released
     if (containersBeingReleased.containsKey(id)) {
       throw new SliderInternalStateException(
         "Container %s already queued for release", id);
     }
-    info.released = true;
-    containersBeingReleased.put(id, info.container);
-    RoleStatus role = lookupRoleStatus(info.roleId);
+    instance.released = true;
+    containersBeingReleased.put(id, instance.container);
+    RoleStatus role = lookupRoleStatus(instance.roleId);
     role.incReleasing();
     roleHistory.onContainerReleaseSubmitted(container);
   }
@@ -1052,7 +1169,7 @@
   @VisibleForTesting
   public RoleInstance innerOnNodeManagerContainerStarted(ContainerId containerId) {
     incStartedCountainerCount();
-    RoleInstance instance = activeContainers.get(containerId);
+    RoleInstance instance = getOwnedContainer(containerId);
     if (instance == null) {
       //serious problem
       throw new YarnRuntimeException("Container not in active containers start "+
@@ -1088,17 +1205,20 @@
    */
   public synchronized void onNodeManagerContainerStartFailed(ContainerId containerId,
                                                              Throwable thrown) {
-    activeContainers.remove(containerId);
+    removeOwnedContainer(containerId);
     incFailedCountainerCount();
     incStartFailedCountainerCount();
     RoleInstance instance = getStartingNodes().remove(containerId);
     if (null != instance) {
       RoleStatus roleStatus = lookupRoleStatus(instance.roleId);
+      String text;
       if (null != thrown) {
-        instance.diagnostics = SliderUtils.stringify(thrown);
+        text = SliderUtils.stringify(thrown);
+      } else {
+        text = "container start failure";
       }
-      roleStatus.noteFailed(null);
-      roleStatus.incStartFailed(); 
+      instance.diagnostics = text;
+      roleStatus.noteFailed(true, null);
       getFailedNodes().put(containerId, instance);
       roleHistory.onNodeManagerContainerStartFailed(instance.container);
     }
@@ -1161,31 +1281,23 @@
    * @return NodeCompletionResult
    */
   public synchronized NodeCompletionResult onCompletedNode(ContainerStatus status) {
-    return onCompletedNode(null, status);
-  }
-  
-  /**
-   * handle completed node in the CD -move something from the live
-   * server list to the completed server list
-   * @param amConf YarnConfiguration
-   * @param status the node that has just completed
-   * @return NodeCompletionResult
-   */
-  public synchronized NodeCompletionResult onCompletedNode(Configuration amConf,
-      ContainerStatus status) {
     ContainerId containerId = status.getContainerId();
     NodeCompletionResult result = new NodeCompletionResult();
     RoleInstance roleInstance;
 
     if (containersBeingReleased.containsKey(containerId)) {
-      log.info("Container was queued for release");
+      log.info("Container was queued for release : {}", containerId);
       Container container = containersBeingReleased.remove(containerId);
       RoleStatus roleStatus = lookupRoleStatus(container);
-      log.info("decrementing role count for role {}", roleStatus.getName());
-      roleStatus.decReleasing();
-      roleStatus.decActual();
-      roleStatus.incCompleted();
-      roleHistory.onReleaseCompleted(container);
+      int releasing = roleStatus.decReleasing();
+      int actual = roleStatus.decActual();
+      int completedCount = roleStatus.incCompleted();
+      log.info("decrementing role count for role {} to {}; releasing={}, completed={}",
+          roleStatus.getName(),
+          actual,
+          releasing,
+          completedCount);
+      roleHistory.onReleaseCompleted(container, true);
 
     } else if (surplusNodes.remove(containerId)) {
       //its a surplus one being purged
@@ -1193,7 +1305,7 @@
     } else {
       //a container has failed 
       result.containerFailed = true;
-      roleInstance = activeContainers.remove(containerId);
+      roleInstance = removeOwnedContainer(containerId);
       if (roleInstance != null) {
         //it was active, move it to failed 
         incFailedCountainerCount();
@@ -1205,43 +1317,30 @@
       }
       if (roleInstance != null) {
         int roleId = roleInstance.roleId;
-        log.info("Failed container in role {}", roleId);
+        String rolename = roleInstance.role;
+        log.info("Failed container in role[{}] : {}", roleId, rolename);
         try {
           RoleStatus roleStatus = lookupRoleStatus(roleId);
           roleStatus.decActual();
           boolean shortLived = isShortLived(roleInstance);
           String message;
-          if (roleInstance.container != null) {
-            String user = null;
-            try {
-              user = SliderUtils.getCurrentUser().getShortUserName();
-            } catch (IOException ignored) {
-            }
-            String completedLogsUrl = null;
-            Container c = roleInstance.container;
-            String url = null;
-            if (amConf != null) {
-              url = amConf.get(YarnConfiguration.YARN_LOG_SERVER_URL);
-            }
-            if (user != null && url != null) {
-              completedLogsUrl = url
-                  + "/" + c.getNodeId() + "/" + roleInstance.getContainerId() + "/ctx/" + user;
-            }
-            message = String.format("Failure %s on host %s" +
-                (completedLogsUrl != null ? ", see %s" : ""), roleInstance.getContainerId(),
-                c.getNodeId().getHost(), completedLogsUrl);
-          } else {
-            message = String.format("Failure %s",
-                                    containerId.toString());
-          }
-          roleStatus.noteFailed(message);
-          //have a look to see if it short lived
-          if (shortLived) {
-            roleStatus.incStartFailed();
-          }
+          Container failedContainer = roleInstance.container;
           
-          if (roleInstance.container != null) {
-            roleHistory.onFailedContainer(roleInstance.container, shortLived);
+          //build the failure message
+          if (failedContainer != null) {
+            String completedLogsUrl = getLogsURLForContainer(failedContainer);
+            message = String.format("Failure %s on host %s: %s",
+                roleInstance.getContainerId().toString(),
+                failedContainer.getNodeId().getHost(),
+                completedLogsUrl);
+          } else {
+            message = String.format("Failure %s", containerId);
+          }
+          int failed = roleStatus.noteFailed(shortLived, message);
+          log.info("Current count of failed role[{}] {} =  {}",
+              roleId, rolename, failed);
+          if (failedContainer != null) {
+            roleHistory.onFailedContainer(failedContainer, shortLived);
           }
           
         } catch (YarnRuntimeException e1) {
@@ -1255,30 +1354,67 @@
         completionOfUnknownContainerEvent.incrementAndGet();
       }
     }
-    
+
     if (result.surplusNode) {
       //a surplus node
       return result;
     }
-    
+
     //record the complete node's details; this pulls it from the livenode set 
     //remove the node
     ContainerId id = status.getContainerId();
+    log.info("Removing node ID {}", id);
     RoleInstance node = getLiveNodes().remove(id);
-    if (node == null) {
-      log.warn("Received notification of completion of unknown node {}", id);
-      completionOfNodeNotInLiveListEvent.incrementAndGet();
-
-    } else {
+    if (node != null) {
       node.state = ClusterDescription.STATE_DESTROYED;
       node.exitCode = status.getExitStatus();
       node.diagnostics = status.getDiagnostics();
       getCompletedNodes().put(id, node);
       result.roleInstance = node;
+    } else {
+      // not in the list
+      log.warn("Received notification of completion of unknown node {}", id);
+      completionOfNodeNotInLiveListEvent.incrementAndGet();
+
     }
+    
+    // and the active node list if present
+    removeOwnedContainer(containerId);
+    
+    // finally, verify the node doesn't exist any more
+    assert !containersBeingReleased.containsKey(
+        containerId) : "container still in release queue";
+    assert !getLiveNodes().containsKey(
+        containerId) : " container still in live nodes";
+    assert getOwnedContainer(containerId) ==
+           null : "Container still in active container list";
+
     return result;
   }
 
+  /**
+   * Get the URL log for a container
+   * @param c container
+   * @return the URL or "" if it cannot be determined
+   */
+  protected String getLogsURLForContainer(Container c) {
+    if (c==null) {
+      return null;
+    }
+    String user = null;
+    try {
+      user = SliderUtils.getCurrentUser().getShortUserName();
+    } catch (IOException ignored) {
+    }
+    String completedLogsUrl = "";
+    String url = logServerURL;
+    if (user != null && SliderUtils.isSet(url)) {
+      completedLogsUrl = url
+          + "/" + c.getNodeId() + "/" + c.getId() + "/ctx/" + user;
+    }
+    return completedLogsUrl;
+  }
+
 
   /**
    * Return the percentage done that Slider is to have YARN display in its
@@ -1310,7 +1446,7 @@
    * Update the cluster description with anything interesting
    * @param providerStatus status from the provider for the cluster info section
    */
-  public void refreshClusterStatus(Map<String, String> providerStatus) {
+  public synchronized ClusterDescription refreshClusterStatus(Map<String, String> providerStatus) {
     ClusterDescription cd = getClusterStatus();
     long now = now();
     cd.setInfoTime(StatusKeys.INFO_STATUS_TIME_HUMAN,
@@ -1321,10 +1457,10 @@
         cd.setInfo(entry.getKey(),entry.getValue());
       }
     }
-    MapOperations infoOps = new MapOperations("info",cd.info);
+    MapOperations infoOps = new MapOperations("info", cd.info);
     infoOps.mergeWithoutOverwrite(applicationInfo);
     SliderUtils.addBuildInfo(infoOps, "status");
-    cd.statistics = new HashMap<>();
+    cd.statistics = new HashMap<String, Map<String, Integer>>();
 
     // build the map of node -> container IDs
     Map<String, List<String>> instanceMap = createRoleToInstanceMap();
@@ -1333,7 +1469,7 @@
     //build the map of node -> containers
     Map<String, Map<String, ClusterNode>> clusterNodes =
       createRoleToClusterNodeMap();
-    cd.status = new HashMap<>();
+    cd.status = new HashMap<String, Object>();
     cd.status.put(ClusterDescriptionKeys.KEY_CLUSTER_LIVE, clusterNodes);
 
 
@@ -1352,7 +1488,7 @@
       cd.statistics.put(rolename, stats);
     }
 
-    Map<String, Integer> sliderstats = new HashMap<>();
+    Map<String, Integer> sliderstats = new HashMap<String, Integer>();
     sliderstats.put(StatusKeys.STATISTICS_CONTAINERS_COMPLETED,
         completedContainerCount.get());
     sliderstats.put(StatusKeys.STATISTICS_CONTAINERS_FAILED,
@@ -1367,7 +1503,7 @@
     sliderstats.put(StatusKeys.STATISTICS_CONTAINERS_UNKNOWN_COMPLETED,
         completionOfUnknownContainerEvent.get());
     cd.statistics.put(SliderKeys.COMPONENT_AM, sliderstats);
-    
+    return cd;
   }
 
   /**
@@ -1376,7 +1512,7 @@
   public synchronized List<AbstractRMOperation> reviewRequestAndReleaseNodes()
       throws SliderInternalStateException, TriggerClusterTeardownException {
     log.debug("in reviewRequestAndReleaseNodes()");
-    List<AbstractRMOperation> allOperations = new ArrayList<>();
+    List<AbstractRMOperation> allOperations = new ArrayList<AbstractRMOperation>();
     for (RoleStatus roleStatus : getRoleStatusMap().values()) {
       if (!roleStatus.getExcludeFromFlexing()) {
         List<AbstractRMOperation> operations = reviewOneRole(roleStatus);
@@ -1385,23 +1521,58 @@
     }
     return allOperations;
   }
-  
-  public void checkFailureThreshold(RoleStatus role) throws
-                                                        TriggerClusterTeardownException {
-    int failures = role.getFailed();
 
-    if (failures > failureThreshold) {
+  /**
+   * Check the failure threshold for a role
+   * @param role role to examine
+   * @throws TriggerClusterTeardownException if the role
+   * has failed too many times
+   */
+  private void checkFailureThreshold(RoleStatus role)
+      throws TriggerClusterTeardownException {
+    int failures = role.getFailed();
+    int threshold = getFailureThresholdForRole(role);
+    log.debug("Failure count of role: {}: {}, threshold={}",
+        role.getName(), failures, threshold);
+
+    if (failures > threshold) {
       throw new TriggerClusterTeardownException(
         SliderExitCodes.EXIT_DEPLOYMENT_FAILED,
         ErrorStrings.E_UNSTABLE_CLUSTER +
-        " - failed with role %s failing %d times (%d in startup); threshold is %d - last failure: %s",
+        " - failed with role %s failing %d times (%d in startup);" +
+        " threshold is %d - last failure: %s",
         role.getName(),
         role.getFailed(),
         role.getStartFailed(),
-        failureThreshold,
+          threshold,
         role.getFailureMessage());
     }
   }
+
+  /**
+   * Get the failure threshold for a specific role, falling back to
+   * the global one if not
+   * @param roleStatus
+   * @return the threshold for failures
+   */
+  private int getFailureThresholdForRole(RoleStatus roleStatus) {
+    ConfTreeOperations resources =
+        instanceDefinition.getResourceOperations();
+    return resources.getComponentOptInt(roleStatus.getName(),
+        ResourceKeys.CONTAINER_FAILURE_SHORTLIFE,
+        failureThreshold);
+  }
+  
+  /**
+   * Reset the failure counts of all roles
+   */
+  public void resetFailureCounts() {
+    for (RoleStatus roleStatus : getRoleStatusMap().values()) {
+      int failed = roleStatus.resetFailed();
+      log.debug("Resetting failure count of {}; was {}", roleStatus.getName(),
+          failed);
+    }
+  }
   
   /**
    * Look at the allocation status of one role, and trigger add/release
@@ -1412,9 +1583,10 @@
    * @throws SliderInternalStateException if the operation reveals that
    * the internal state of the application is inconsistent.
    */
-  public List<AbstractRMOperation> reviewOneRole(RoleStatus role)
+  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
+  private List<AbstractRMOperation> reviewOneRole(RoleStatus role)
       throws SliderInternalStateException, TriggerClusterTeardownException {
-    List<AbstractRMOperation> operations = new ArrayList<>();
+    List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>();
     int delta;
     String details;
     int expected;
@@ -1457,18 +1629,43 @@
 
       // get the nodes to release
       int roleId = role.getKey();
-      List<NodeInstance> nodesForRelease =
-        roleHistory.findNodesForRelease(roleId, excess);
-      
-      for (NodeInstance node : nodesForRelease) {
-        RoleInstance possible = findRoleInstanceOnHost(node, roleId);
-        if (possible == null) {
-          throw new SliderInternalStateException(
-            "Failed to find a container to release on node %s", node.hostname);
-        }
-        containerReleaseSubmitted(possible.container);
-        operations.add(new ContainerReleaseOperation(possible.getId()));
+            
+      // enum all active nodes that aren't being released
+      List<RoleInstance> containersToRelease = enumNodesWithRoleId(roleId, true);
 
+      // cut all release-in-progress nodes
+      ListIterator<RoleInstance> li = containersToRelease.listIterator();
+      while (li.hasNext()) {
+        RoleInstance next = li.next();
+        if (next.released) {
+          li.remove();
+        }
+      }
+
+      // warn if the desired state can't be reaced
+      if (containersToRelease.size() < excess) {
+        log.warn("Not enough nodes to release...short of {} nodes",
+            containersToRelease.size() - excess);
+      }
+      
+      // ask the release selector to sort the targets
+      containersToRelease =  containerReleaseSelector.sortCandidates(
+          roleId,
+          containersToRelease,
+          excess);
+      
+      //crop to the excess
+
+      List<RoleInstance> finalCandidates = (excess < containersToRelease.size()) 
+          ? containersToRelease.subList(0, excess)
+          : containersToRelease;
+      
+
+      // then build up a release operation, logging each container as released
+      for (RoleInstance possible : finalCandidates) {
+        log.debug("Targeting for release: {}", possible);
+        containerReleaseSubmitted(possible.container);
+        operations.add(new ContainerReleaseOperation(possible.getId()));       
       }
    
     }
@@ -1482,12 +1679,12 @@
    * @return
    * @throws SliderInternalStateException
    */
-  public List<AbstractRMOperation> releaseContainer(String containerId)
+  public List<AbstractRMOperation> releaseContainer(ContainerId containerId)
       throws SliderInternalStateException {
-    List<AbstractRMOperation> operations = new ArrayList<>();
-    List<RoleInstance> activeRoleInstances = cloneActiveContainerList();
+    List<AbstractRMOperation> operations = new ArrayList<AbstractRMOperation>();
+    List<RoleInstance> activeRoleInstances = cloneOwnedContainerList();
     for (RoleInstance role : activeRoleInstances) {
-      if (role.container.getId().toString().equals(containerId)) {
+      if (role.container.getId().equals(containerId)) {
         containerReleaseSubmitted(role.container);
         operations.add(new ContainerReleaseOperation(role.getId()));
       }
@@ -1507,7 +1704,7 @@
    * that can be released.
    */
   private RoleInstance findRoleInstanceOnHost(NodeInstance node, int roleId) {
-    Collection<RoleInstance> targets = cloneActiveContainerList();
+    Collection<RoleInstance> targets = cloneOwnedContainerList();
     String hostname = node.hostname;
     for (RoleInstance ri : targets) {
       if (hostname.equals(RoleHistoryUtils.hostnameOf(ri.container))
@@ -1525,14 +1722,16 @@
    */
   public synchronized List<AbstractRMOperation> releaseAllContainers() {
 
-    Collection<RoleInstance> targets = cloneActiveContainerList();
+    Collection<RoleInstance> targets = cloneOwnedContainerList();
     log.info("Releasing {} containers", targets.size());
     List<AbstractRMOperation> operations =
-      new ArrayList<>(targets.size());
+      new ArrayList<AbstractRMOperation>(targets.size());
     for (RoleInstance instance : targets) {
       Container possible = instance.container;
       ContainerId id = possible.getId();
       if (!instance.released) {
+        String url = getLogsURLForContainer(possible);
+        log.info("Releasing container. Log: " + url);
         try {
           containerReleaseSubmitted(possible);
         } catch (SliderInternalStateException e) {
@@ -1677,7 +1876,7 @@
     instance.container = container;
     instance.createTime = now();
     instance.state = ClusterDescription.STATE_LIVE;
-    activeContainers.put(cid, instance);
+    putOwnedContainer(cid, instance);
     //role history gets told
     roleHistory.onContainerAssigned(container);
     // pretend the container has just had its start actions submitted
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseSelector.java
similarity index 64%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseSelector.java
index e3e595f..0cbc134 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ContainerReleaseSelector.java
@@ -18,14 +18,21 @@
 
 package org.apache.slider.server.appmaster.state;
 
-public class AbstractRMOperation {
+import java.util.List;
+
+/**
+ * Interface implemented by anything that must choose containers to release
+ * 
+ */
+public interface ContainerReleaseSelector {
 
   /**
-   * Execute the operation
-   * @param asyncRMClient client
+   * Given a list of candidate containers, return a sorted version of the priority
+   * in which they should be released. 
+   * @param candidates candidate list ... everything considered suitable
+   * @return
    */
-  public void execute(RMOperationHandler handler) {
-
-  }
-  
+  List<RoleInstance> sortCandidates(int roleId,
+      List<RoleInstance> candidates,
+      int minimumToSelect);
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/MostRecentContainerReleaseSelector.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/MostRecentContainerReleaseSelector.java
new file mode 100644
index 0000000..841dda3
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/MostRecentContainerReleaseSelector.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.state;
+
+import org.apache.slider.common.tools.Comparators;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Sort the candidate list by the most recent container first.
+ */
+public class MostRecentContainerReleaseSelector implements ContainerReleaseSelector {
+
+  @Override
+  public List<RoleInstance> sortCandidates(int roleId,
+      List<RoleInstance> candidates,
+      int minimumToSelect) {
+    Collections.sort(candidates, new newerThan());
+    return candidates;
+  }
+
+  private static class newerThan implements Comparator<RoleInstance>, Serializable {
+    private final Comparator<Long> innerComparator =
+        new Comparators.ComparatorReverser<Long>(new Comparators.LongComparator());
+    public int compare(RoleInstance o1, RoleInstance o2) {
+      return innerComparator.compare(o1.createTime, o2.createTime);
+
+    }
+    
+  }
+  
+  
+}
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
index a9e5a8c..c8ab2a7 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
@@ -169,7 +169,6 @@
    * Release an instance -which is no longer marked as active
    */
   public synchronized void release() {
-    assert live > 0 : "no live nodes to release";
     releasing++;
   }
 
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index 06375fb..1ba2282 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -41,7 +41,7 @@
    */
   public NodeInstance(String hostname, int roles) {
     this.hostname = hostname;
-    nodeEntries = new ArrayList<>(roles);
+    nodeEntries = new ArrayList<NodeEntry>(roles);
   }
 
   /**
@@ -75,7 +75,7 @@
   }
 
   /**
-   * Cout the number of active role instances on this node
+   * Count the number of active role instances on this node
    * @param role role index
    * @return 0 if there are none, otherwise the #of nodes that are running and
    * not being released already.
@@ -84,6 +84,16 @@
     NodeEntry nodeEntry = get(role);
     return (nodeEntry != null ) ? nodeEntry.getActive() : 0;
   }
+  
+  /**
+   * Count the number of live role instances on this node
+   * @param role role index
+   * @return 0 if there are none, otherwise the #of nodes that are running 
+   */
+  public int getLiveRoleInstances(int role) {
+    NodeEntry nodeEntry = get(role);
+    return (nodeEntry != null ) ? nodeEntry.getLive() : 0;
+  }
 
   /**
    * Get the entry for a role -and remove it if present
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index 32b1656..570c194 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -73,7 +73,7 @@
    * in that role
    */
   public List<NodeInstance> listActiveNodes(int role) {
-    List<NodeInstance> nodes = new ArrayList<>();
+    List<NodeInstance> nodes = new ArrayList<NodeInstance>();
     for (NodeInstance instance : values()) {
       if (instance.getActiveRoleInstances(role) > 0) {
         nodes.add(instance);
@@ -102,60 +102,6 @@
     }
     return purged;
   }
-  
-  
-
-  /**
-   * Find a list of node for release; algorithm may make its own
-   * decisions on which to release.
-   * @param role role index
-   * @param count number of nodes to release
-   * @return a possibly empty list of nodes.
-   */
-  public List<NodeInstance> findNodesForRelease(int role, int count) {
-    List<NodeInstance> targets = new ArrayList<>(count);
-    List<NodeInstance> active = listActiveNodes(role);
-    List<NodeInstance> multiple = new ArrayList<>();
-    int nodesRemaining = count;
-    log.debug("searching for {} nodes with candidate set size {}",
-              nodesRemaining, active.size());
-    ListIterator<NodeInstance> it = active.listIterator();
-    while (it.hasNext() && nodesRemaining > 0) {
-      NodeInstance ni = it.next();
-      int load = ni.getActiveRoleInstances(role);
-      log.debug("Node {} load={}", ni, load);
-      assert load != 0; 
-      if (load == 1) {
-        // at the tail of the list, from here active[*] is a load=1 entry
-        break;
-      }
-      // load is >1. Add an entry to the target list FOR EACH INSTANCE ABOVE 1
-      for (int i = 0; i < (load - 1) && nodesRemaining > 0; i++) {
-        nodesRemaining--;
-        log.debug("Push {} #{}", ni, i);
-        targets.add(ni);
-      }
-      // and add to the multiple list
-      multiple.add(ni);
-      // then pop it from the active list
-      it.remove();
-    }
-    //here either the number is found or there is still some left.
-
-    if (nodesRemaining > 0) {
-      // leftovers. Append any of the multiple node entries to the tail of 
-      // the active list (so they get chosen last)
-      active.addAll(multiple);
-      // all the entries in the list have exactly one node
-      // so ask for as many as are needed
-      int ask = Math.min(nodesRemaining, active.size());
-      log.debug("load=1 nodes to select={} multiples={} available={} ask={}",
-                nodesRemaining, multiple.size(),active.size(), ask);
-      targets.addAll(active.subList(0, ask));
-    }
-    return targets;
-  }
-
 
   /**
    * Clone point
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index fa2c754..d847962 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -43,7 +43,7 @@
     LoggerFactory.getLogger(OutstandingRequestTracker.class);
 
   private Map<OutstandingRequest, OutstandingRequest> requests =
-    new HashMap<>();
+    new HashMap<OutstandingRequest, OutstandingRequest>();
 
   /**
    * Create a new request for the specific role. If a
@@ -161,7 +161,7 @@
    * @return possibly empty list of hostnames
    */
   public synchronized List<NodeInstance> cancelOutstandingRequests(int role) {
-    List<NodeInstance> hosts = new ArrayList<>();
+    List<NodeInstance> hosts = new ArrayList<NodeInstance>();
     Iterator<Map.Entry<OutstandingRequest,OutstandingRequest>> iterator =
       requests.entrySet().iterator();
     while (iterator.hasNext()) {
@@ -178,6 +178,6 @@
   }
   
   public synchronized List<OutstandingRequest> listOutstandingRequests() {
-    return new ArrayList<>(requests.values());
+    return new ArrayList<OutstandingRequest>(requests.values());
   }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
index 6caf1a9..a0871ae 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
@@ -39,7 +39,7 @@
 
 
   private final Map<String, PublishedConfigSet> publishedConfigSets =
-      new ConcurrentHashMap<>(5);
+      new ConcurrentHashMap<String, PublishedConfigSet>(5);
   private static final PatternValidator validator = new PatternValidator(
       RestPaths.PUBLISHED_CONFIGURATION_SET_REGEXP);
   private String applicationName;
@@ -92,7 +92,7 @@
   public List<String> listConfigSets() {
 
     synchronized (publishedConfigSets) {
-      List<String> sets = new ArrayList<>(publishedConfigSets.keySet());
+      List<String> sets = new ArrayList<String>(publishedConfigSets.keySet());
       return sets;
     }
   }
@@ -164,18 +164,23 @@
   }
 
   @Override
-  public List<RoleInstance> cloneActiveContainerList() {
-    return appState.cloneActiveContainerList();
+  public List<RoleInstance> cloneOwnedContainerList() {
+    return appState.cloneOwnedContainerList();
   }
 
   @Override
-  public int getNumActiveContainers() {
-    return appState.getNumActiveContainers();
+  public int getNumOwnedContainers() {
+    return appState.getNumOwnedContainers();
   }
 
   @Override
-  public RoleInstance getActiveContainer(ContainerId id) {
-    return appState.getActiveContainer(id);
+  public RoleInstance getOwnedContainer(ContainerId id) {
+    return appState.getOwnedContainer(id);
+  }
+
+  @Override
+  public RoleInstance getOwnedContainer(String id) throws NoSuchNodeException {
+    return appState.getOwnedInstanceByContainerID(id);
   }
 
   @Override
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 0cd2b39..edcf7ea 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -29,7 +29,6 @@
 import org.apache.slider.providers.ProviderRole;
 import org.apache.slider.server.avro.RoleHistoryHeader;
 import org.apache.slider.server.avro.RoleHistoryWriter;
-import org.mortbay.log.Log;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -61,7 +60,7 @@
     LoggerFactory.getLogger(RoleHistory.class);
   private final List<ProviderRole> providerRoles;
   private final Map<String, ProviderRole> providerRoleMap =
-    new HashMap<>();
+    new HashMap<String, ProviderRole>();
   private long startTime;
   /**
    * Time when saved
@@ -108,9 +107,8 @@
     nodemap = new NodeMap(roleSize);
     resetAvailableNodeLists();
 
-    resetAvailableNodeLists();
     outstandingRequests = new OutstandingRequestTracker();
-    Map<Integer, RoleStatus> roleStats = new HashMap<>();
+    Map<Integer, RoleStatus> roleStats = new HashMap<Integer, RoleStatus>();
 
 
     for (ProviderRole providerRole : providerRoles) {
@@ -142,7 +140,7 @@
    */
   public void addNewProviderRole(ProviderRole providerRole)
     throws BadConfigException {
-    Map<Integer, RoleStatus> roleStats = new HashMap<>();
+    Map<Integer, RoleStatus> roleStats = new HashMap<Integer, RoleStatus>();
 
 
     for (ProviderRole role : providerRoles) {
@@ -156,7 +154,7 @@
    * Clear the lists of available nodes
    */
   private synchronized void resetAvailableNodeLists() {
-    availableNodes = new HashMap<>(roleSize);
+    availableNodes = new HashMap<Integer, LinkedList<NodeInstance>>(roleSize);
   }
 
   /**
@@ -434,7 +432,7 @@
   private LinkedList<NodeInstance> getOrCreateNodesForRoleId(int id) {
     LinkedList<NodeInstance> instances = availableNodes.get(id);
     if (instances == null) {
-      instances = new LinkedList<>();
+      instances = new LinkedList<NodeInstance>();
       availableNodes.put(id, instances);
     }
     return instances;
@@ -479,7 +477,7 @@
       }
     }
     if (nodeInstance == null) {
-      log.debug("No node selected for {}", role.getName());
+      log.debug("No historical node found for {}", role.getName());
     }
     return nodeInstance;
   }
@@ -518,18 +516,16 @@
     return requestInstanceOnNode(node, role, resource);
   }
 
-
   /**
-   * Find a list of node for release; algorithm may make its own
-   * decisions on which to release.
+   * Get the list of active nodes ... walks the node  map so 
+   * is O(nodes)
    * @param role role index
-   * @param count number of nodes to release
-   * @return a possibly empty list of nodes.
+   * @return a possibly empty list of nodes with an instance of that node
    */
-  public synchronized List<NodeInstance> findNodesForRelease(int role, int count) {
-    return nodemap.findNodesForRelease(role, count);
+  public synchronized List<NodeInstance> listActiveNodes(int role) {
+    return nodemap.listActiveNodes(role);
   }
- 
+  
   /**
    * Get the node entry of a container
    * @param container container to look up
@@ -572,7 +568,7 @@
     List<Container> requested =
       new ArrayList<Container>(allocatedContainers.size());
     List<Container> unrequested =
-      new ArrayList<>(allocatedContainers.size());
+      new ArrayList<Container>(allocatedContainers.size());
     outstandingRequests.partitionRequests(this, allocatedContainers, requested, unrequested);
     
     //give the unrequested ones lower priority
@@ -659,10 +655,11 @@
   /**
    * App state notified of a container completed 
    * @param container completed container
+   * @param wasReleased
    * @return true if the node was queued
    */
-  public boolean onReleaseCompleted(Container container) {
-    return markContainerFinished(container, true, false);
+  public boolean onReleaseCompleted(Container container, boolean wasReleased) {
+    return markContainerFinished(container, wasReleased, false);
   }
 
   /**
@@ -750,7 +747,7 @@
    */
   @VisibleForTesting
   public List<NodeInstance> cloneAvailableList(int role) {
-    return new LinkedList<>(getOrCreateNodesForRoleId(role));
+    return new LinkedList<NodeInstance>(getOrCreateNodesForRoleId(role));
   }
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java
index 205edea..e373843 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java
@@ -25,8 +25,12 @@
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.proto.Messages;
 import org.apache.slider.common.tools.SliderUtils;
+import org.apache.slider.core.registry.info.RegisteredEndpoint;
 
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 
 /**
  * Tracking information about a container
@@ -82,11 +86,12 @@
   public String host;
   public String hostURL;
 
+
   /**
-   * Any information the provider wishes to retain on the state of
-   * an instance
+   * A list of registered endpoints.
    */
-  public Object providerInfo;
+  private List<RegisteredEndpoint> endpoints =
+      new ArrayList<RegisteredEndpoint>(2);
 
   public RoleInstance(Container container) {
     Preconditions.checkNotNull(container, "Null container");
@@ -115,12 +120,12 @@
   public String toString() {
     final StringBuilder sb =
       new StringBuilder("RoleInstance{");
-    sb.append("container=").append(SliderUtils.containerToString(container));
+    sb.append("role='").append(role).append('\'');
     sb.append(", id='").append(id).append('\'');
+    sb.append(", container=").append(SliderUtils.containerToString(container));
     sb.append(", createTime=").append(createTime);
     sb.append(", startTime=").append(startTime);
     sb.append(", released=").append(released);
-    sb.append(", role='").append(role).append('\'');
     sb.append(", roleId=").append(roleId);
     sb.append(", host=").append(host);
     sb.append(", hostURL=").append(hostURL);
@@ -188,8 +193,39 @@
   @Override
   public Object clone() throws CloneNotSupportedException {
     RoleInstance cloned = (RoleInstance) super.clone();
+    // clone the endpoint list, but not the values
+    cloned.endpoints = new ArrayList<RegisteredEndpoint>(this.endpoints);
     return cloned;
   }
 
+  /**
+   * Get the list of endpoints. 
+   * @return the endpoint list.
+   */
+  public List<RegisteredEndpoint> getEndpoints() {
+    return endpoints;
+  }
 
+  /**
+   * Add an endpoint registration
+   * @param endpoint
+   */
+  public void addEndpoint(RegisteredEndpoint endpoint) {
+    Preconditions.checkArgument(endpoint != null);
+    endpoints.add(endpoint);
+  }
+
+  /**
+   * Register a port endpoint as an inet-addr formatted endpoint, using the
+   * hostname as the first part of the address
+   * @param port
+   * @param protocol
+   * @param text
+   */
+  public void registerPortEndpoint(int port, String protocol, String text) {
+    InetSocketAddress addr = new InetSocketAddress(host, port);
+    RegisteredEndpoint epr = new RegisteredEndpoint(addr, protocol, text);
+    addEndpoint(epr);
+  }
+  
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 04d8b37..df4ab8e 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -45,7 +45,7 @@
   private final ProviderRole providerRole;
 
   private int desired, actual, requested, releasing;
-  private int failed, started, startFailed, completed, totalRequested;
+  private volatile int failed, started, startFailed, completed, totalRequested;
 
   private String failureMessage = "";
 
@@ -143,16 +143,34 @@
   }
 
   /**
+   * Reset the failure counts
+   * @return the total number of failures up to this point
+   */
+  public int resetFailed() {
+    int total = failed + startFailed;
+    failed = 0;
+    startFailed = 0;
+    return total;
+  }
+
+  /**
    * Note that a role failed, text will
    * be used in any diagnostics if an exception
    * is later raised.
+   * @param startupFailure flag to indicate this was a startup event
+   * @return the number of failures
    * @param text text about the failure
    */
-  public void noteFailed(String text) {
-    failed++;
+  public int noteFailed(boolean startupFailure, String text) {
+    int current = ++failed;
     if (text != null) {
       failureMessage = text;
     }
+    //have a look to see if it short lived
+    if (startupFailure) {
+      incStartFailed();
+    }
+    return current;
   }
 
   public int getStartFailed() {
@@ -175,8 +193,8 @@
     this.completed = completed;
   }
 
-  public void incCompleted() {
-    completed ++;
+  public int incCompleted() {
+    return completed ++;
   }
   public int getStarted() {
     return started;
@@ -246,7 +264,7 @@
    * @return a map for use in statistics reports
    */
   public Map<String, Integer> buildStatistics() {
-    Map<String, Integer> stats = new HashMap<>();
+    Map<String, Integer> stats = new HashMap<String, Integer>();
     stats.put(StatusKeys.STATISTICS_CONTAINERS_ACTIVE_REQUESTS, getRequested());
     stats.put(StatusKeys.STATISTICS_CONTAINERS_COMPLETED, getCompleted());
     stats.put(StatusKeys.STATISTICS_CONTAINERS_DESIRED, getDesired());
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/SimpleReleaseSelector.java
similarity index 73%
copy from slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
copy to slider-core/src/main/java/org/apache/slider/server/appmaster/state/SimpleReleaseSelector.java
index e3e595f..b7f0e05 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractRMOperation.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/SimpleReleaseSelector.java
@@ -18,14 +18,17 @@
 
 package org.apache.slider.server.appmaster.state;
 
-public class AbstractRMOperation {
+import java.util.List;
 
-  /**
-   * Execute the operation
-   * @param asyncRMClient client
-   */
-  public void execute(RMOperationHandler handler) {
+/**
+ * Simplest release selector simply returns the list
+ */
+public class SimpleReleaseSelector implements ContainerReleaseSelector {
 
+  @Override
+  public List<RoleInstance> sortCandidates(int roleId,
+      List<RoleInstance> candidates,
+      int minimumToSelect) {
+    return candidates;
   }
-  
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
index acba8cc..1714f75 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
@@ -144,20 +144,27 @@
    * @return the active containers at the time
    * the call was made
    */
-  List<RoleInstance> cloneActiveContainerList();
+  List<RoleInstance> cloneOwnedContainerList();
 
   /**
    * Get the number of active containers
    * @return the number of active containers the time the call was made
    */
-  int getNumActiveContainers();
+  int getNumOwnedContainers();
 
   /**
    * Get any active container with the given ID
    * @param id container Id
    * @return the active container or null if it is not found
    */
-  RoleInstance getActiveContainer(ContainerId id);
+  RoleInstance getOwnedContainer(ContainerId id);
+
+  /**
+   * Get any active container with the given ID
+   * @param id container Id
+   * @return the active container or null if it is not found
+   */
+  RoleInstance getOwnedContainer(String id) throws NoSuchNodeException;
 
   /**
    * Create a clone of the list of live cluster nodes.
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
index 4f290af..9192efe 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
@@ -96,7 +96,7 @@
     String regex = "(?!/ws)";
     serveRegex(regex).with(SliderDefaultWrapperServlet.class);
 
-    Map<String, String> params = new HashMap<>();
+    Map<String, String> params = new HashMap<String, String>();
     params.put(ResourceConfig.FEATURE_IMPLICIT_VIEWABLES, "true");
     params.put(ServletContainer.FEATURE_FILTER_FORWARD_ON_404, "true");
     params.put(ResourceConfig.FEATURE_XMLROOTELEMENT_PROCESSING, "true");
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java
index 606c05d..5fffa4a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmFilterInitializer.java
@@ -43,7 +43,7 @@
   @Override
   public void initFilter(FilterContainer container, Configuration conf) {
     configuration = conf;
-    Map<String, String> params = new HashMap<>();
+    Map<String, String> params = new HashMap<String, String>();
     String proxy = WebAppUtils.getProxyHostAndPort(conf);
     String[] parts = proxy.split(":");
     params.put(SliderAmIpFilter.PROXY_HOST, parts[0]);
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java
index ad5e219..4c66876 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAmIpFilter.java
@@ -70,7 +70,7 @@
     synchronized(this) {
       if(proxyAddresses == null || (lastUpdate + updateInterval) >= now) {
         try {
-          proxyAddresses = new HashSet<>();
+          proxyAddresses = new HashSet<String>();
           for(InetAddress add : InetAddress.getAllByName(proxyHost)) {
             if (log.isDebugEnabled()) {
               log.debug("proxy address is: " + add.getHostAddress());
@@ -133,7 +133,11 @@
             principal);
         chain.doFilter(requestWrapper, resp);
       }
-    } catch (IOException | ServletException e) {
+// JKD7    } catch (IOException | ServletException e) {
+    } catch (IOException e) {
+      log.warn("When fetching {}: {}", requestURI, e);
+      throw e;
+    } catch (ServletException e) {
       log.warn("When fetching {}: {}", requestURI, e);
       throw e;
     }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
index 4eebd45..4d595a9 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
@@ -126,7 +126,7 @@
    */
   private TreeMap<String, RoleStatus> getRoleStatusesByName(Map<Integer, ProviderRole> rolesById,
       Map<Integer, RoleStatus> statusById) {
-    TreeMap<String, RoleStatus> statusByName = new TreeMap<>();
+    TreeMap<String, RoleStatus> statusByName = new TreeMap<String, RoleStatus>();
     for (Entry<Integer, ProviderRole> role : rolesById.entrySet()) {
       final RoleStatus status = statusById.get(role.getKey());
 
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java
index d3388f5..a08d46d 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeat.java
@@ -18,6 +18,7 @@
 
 package org.apache.slider.server.appmaster.web.rest.agent;
 
+import org.apache.slider.providers.agent.State;
 import org.codehaus.jackson.annotate.JsonIgnoreProperties;
 import org.codehaus.jackson.annotate.JsonProperty;
 import org.codehaus.jackson.map.annotate.JsonSerialize;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java
index ca2db32..0545499 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/HeartBeatResponse.java
@@ -40,6 +40,7 @@
   RegistrationCommand registrationCommand;
 
   boolean restartAgent = false;
+  boolean restartEnabled = true;
   boolean hasMappedComponents = false;
 
   @JsonProperty("responseId")
@@ -92,6 +93,16 @@
     this.restartAgent = restartAgent;
   }
 
+  @JsonProperty("restartEnabled")
+  public boolean getRstartEnabled() {
+    return restartEnabled;
+  }
+
+  @JsonProperty("restartEnabled")
+  public void setRestartEnabled(boolean restartEnabled) {
+    this.restartEnabled = restartEnabled;
+  }
+
   @JsonProperty("hasMappedComponents")
   public boolean hasMappedComponents() {
     return hasMappedComponents;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java
index 9299a16..a44c3a4 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/agent/Register.java
@@ -16,15 +16,14 @@
  */
 package org.apache.slider.server.appmaster.web.rest.agent;
 
+import org.apache.slider.providers.agent.State;
 import org.codehaus.jackson.annotate.JsonIgnoreProperties;
 import org.codehaus.jackson.annotate.JsonProperty;
 import org.codehaus.jackson.map.annotate.JsonSerialize;
 
-/**
- *
- * Data model for agent to send heartbeat to ambari and/or app master.
- *
- */
+import java.util.Map;
+
+/** Data model for agent to send heartbeat to ambari and/or app master. */
 @JsonIgnoreProperties(ignoreUnknown = true)
 @JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
 public class Register {
@@ -36,6 +35,9 @@
   private String publicHostname;
   private AgentEnv agentEnv;
   private String agentVersion;
+  private State actualState;
+  private State expectedState;
+  private Map<String, String> allocatedPorts;
 
   @JsonProperty("responseId")
   public int getResponseId() {
@@ -44,13 +46,17 @@
 
   @JsonProperty("responseId")
   public void setResponseId(int responseId) {
-    this.responseId=responseId;
+    this.responseId = responseId;
   }
 
   public long getTimestamp() {
     return timestamp;
   }
 
+  public void setTimestamp(long timestamp) {
+    this.timestamp = timestamp;
+  }
+
   public String getHostname() {
     return hostname;
   }
@@ -67,10 +73,6 @@
     this.hardwareProfile = hardwareProfile;
   }
 
-  public void setTimestamp(long timestamp) {
-    this.timestamp = timestamp;
-  }
-
   public String getPublicHostname() {
     return publicHostname;
   }
@@ -103,15 +105,45 @@
     this.currentPingPort = currentPingPort;
   }
 
+  public State getActualState() {
+    return actualState;
+  }
+
+  public void setActualState(State actualState) {
+    this.actualState = actualState;
+  }
+
+  public State getExpectedState() {
+    return expectedState;
+  }
+
+  public void setExpectedState(State expectedState) {
+    this.expectedState = expectedState;
+  }
+
+  /** @return the allocated ports, or <code>null</code> if none are present */
+  @JsonProperty("allocatedPorts")
+  public Map<String, String> getAllocatedPorts() {
+    return allocatedPorts;
+  }
+
+  /** @param ports allocated ports */
+  @JsonProperty("allocatedPorts")
+  public void setAllocatedPorts(Map<String, String> ports) {
+    this.allocatedPorts = ports;
+  }
+
   @Override
   public String toString() {
     String ret = "responseId=" + responseId + "\n" +
                  "timestamp=" + timestamp + "\n" +
-                 "hostname="  + hostname + "\n" +
-                 "currentPingPort=" + currentPingPort + "\n";
+                 "hostname=" + hostname + "\n" +
+                 "expectedState=" + expectedState + "\n" +
+                 "actualState=" + actualState + "\n";
 
-    if (hardwareProfile != null)
+    if (hardwareProfile != null) {
       ret = ret + "hardwareprofile=" + this.hardwareProfile.toString();
+    }
     return ret;
   }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java
index a439d9b..5d8b657 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/publisher/PublisherResource.java
@@ -110,7 +110,7 @@
   @Produces({MediaType.APPLICATION_JSON})
   public Set<URL> getAMClassPath() {
     URL[] urls = ((URLClassLoader) getClass().getClassLoader()).getURLs();
-    return new LinkedHashSet<>(Arrays.asList(urls));
+    return new LinkedHashSet<URL>(Arrays.asList(urls));
   }
 
   @GET
@@ -231,7 +231,7 @@
           propertyName, config);
       throw new NotFoundException("Property not found: " + propertyName);
     }
-    Map<String,String> rtnVal = new HashMap<>();
+    Map<String, String> rtnVal = new HashMap<String, String>();
     rtnVal.put(propertyName, propVal);
 
     return rtnVal;
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java
index 5645e0e..95f0417 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/ContainerStatsBlock.java
@@ -96,7 +96,7 @@
   protected void render(Block html) {
     // TODO Probably better to just get a copy of this list for us to avoid the repeated synchronization?
     // does this change if we have 50 node, 100node, 500 node clusters?
-    final Map<String,RoleInstance> containerInstances = getContainerInstances(slider.getAppState().cloneActiveContainerList());
+    final Map<String,RoleInstance> containerInstances = getContainerInstances(slider.getAppState().cloneOwnedContainerList());
 
     for (Entry<String,RoleStatus> entry : slider.getRoleStatusByName().entrySet()) {
       final String name = entry.getKey();
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java
index 90a3ee7..54bdb09 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/view/IndexBlock.java
@@ -30,7 +30,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.net.URL;
 import java.util.Map;
 import java.util.Map.Entry;
 
@@ -63,7 +62,7 @@
 
     UL<DIV<Hamlet>> ul = div.ul();
 
-    ul.li("Total number of containers for cluster: " + appState.getNumActiveContainers());
+    ul.li("Total number of containers for cluster: " + appState.getNumOwnedContainers());
     ul.li("Cluster created: " + getInfoAvoidingNulls(StatusKeys.INFO_CREATE_TIME_HUMAN));
     ul.li("Cluster last flexed: " + getInfoAvoidingNulls(StatusKeys.INFO_FLEX_TIME_HUMAN));
     ul.li("Cluster running since: " + getInfoAvoidingNulls(StatusKeys.INFO_LIVE_TIME_HUMAN));
diff --git a/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java b/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java
index 2120be5..422ffeb 100644
--- a/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java
+++ b/slider-core/src/main/java/org/apache/slider/server/avro/RoleHistoryWriter.java
@@ -90,7 +90,7 @@
     throws IOException {
     try {
       DatumWriter<RoleHistoryRecord> writer =
-        new SpecificDatumWriter<>(RoleHistoryRecord.class);
+        new SpecificDatumWriter<RoleHistoryRecord>(RoleHistoryRecord.class);
 
       int roles = history.getRoleSize();
       RoleHistoryHeader header = new RoleHistoryHeader();
@@ -184,7 +184,7 @@
                                                        BadConfigException {
     try {
       DatumReader<RoleHistoryRecord> reader =
-        new SpecificDatumReader<>(RoleHistoryRecord.class);
+        new SpecificDatumReader<RoleHistoryRecord>(RoleHistoryRecord.class);
       Decoder decoder =
         DecoderFactory.get().jsonDecoder(RoleHistoryRecord.getClassSchema(),
                                          in);
@@ -336,7 +336,7 @@
     
     PathFilter filter = new GlobFilter(SliderKeys.HISTORY_FILENAME_GLOB_PATTERN);
     FileStatus[] stats = fs.listStatus(dir, filter);
-    List<Path> paths = new ArrayList<>(stats.length);
+    List<Path> paths = new ArrayList<Path>(stats.length);
     for (FileStatus stat : stats) {
       log.debug("Possible entry: {}", stat.toString());
       if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) {
diff --git a/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java b/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java
index 3b2c6d7..a4447e3 100644
--- a/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/server/servicemonitor/MonitorUtils.java
@@ -50,7 +50,7 @@
    * @return a list view with no empty strings
    */
   public static List<String> prepareArgs(String[] args) {
-    List<String> argsList = new ArrayList<>(args.length);
+    List<String> argsList = new ArrayList<String>(args.length);
     StringBuilder argsStr = new StringBuilder("Arguments: [");
     for (String arg : args) {
       argsStr.append('"').append(arg).append("\" ");
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java b/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java
index 14b78a7..b3e2ff2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/curator/RegistryBinderService.java
@@ -54,10 +54,10 @@
   private final ServiceDiscovery<Payload> discovery;
 
   private final Map<String, ServiceInstance<Payload>> entries =
-    new HashMap<>();
+    new HashMap<String, ServiceInstance<Payload>>();
 
   private JsonSerDeser<CuratorServiceInstance<Payload>> deser =
-    new JsonSerDeser<>(CuratorServiceInstance.class);
+    new JsonSerDeser<CuratorServiceInstance<Payload>>(CuratorServiceInstance.class);
 
   /**
    * Create an instance
@@ -221,7 +221,7 @@
     try {
       List<String> instanceIDs = instanceIDs(servicetype);
       List<CuratorServiceInstance<Payload>> instances =
-        new ArrayList<>(instanceIDs.size());
+        new ArrayList<CuratorServiceInstance<Payload>>(instanceIDs.size());
       for (String instanceID : instanceIDs) {
         CuratorServiceInstance<Payload> instance =
           queryForInstance(servicetype, instanceID);
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java b/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java
index 4938adf..e4e8523 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/registry/RegistryRestResources.java
@@ -100,7 +100,8 @@
     try {
       List<CuratorServiceInstance<ServiceInstanceData>>
           instances = registry.listInstances(name);
-      return Response.ok(new CuratorServiceInstances<>(instances)).build();
+      return Response.ok(
+      new CuratorServiceInstances<ServiceInstanceData>(instances)).build();
     } catch (Exception e) {
       log.error("Error during generation of response", e);
       return Response.serverError().build();
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java b/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java
index 1904df5..ca4d180 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/registry/SliderRegistryService.java
@@ -53,7 +53,7 @@
       IOException {
     List<CuratorServiceInstance<ServiceInstanceData>> services =
         listInstances(serviceType);
-    List<ServiceInstanceData> payloads = new ArrayList<>(services.size());
+    List<ServiceInstanceData> payloads = new ArrayList<ServiceInstanceData>(services.size());
     for (CuratorServiceInstance<ServiceInstanceData> instance : services) {
       payloads.add(instance.payload);
     }
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java b/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java
index 5238d90..56ee199 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/security/SecurityUtils.java
@@ -18,6 +18,10 @@
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.RandomStringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.core.conf.MapOperations;
 import org.slf4j.Logger;
@@ -25,11 +29,11 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.attribute.PosixFilePermission;
-import java.nio.file.attribute.PosixFilePermissions;
+//import java.nio.file.Files;
+//import java.nio.file.Path;
+//import java.nio.file.Paths;
+//import java.nio.file.attribute.PosixFilePermission;
+//import java.nio.file.attribute.PosixFilePermissions;
 import java.util.Set;
 
 /**
@@ -89,8 +93,11 @@
   }
 
   public static String getOpenSslCommandResult(String command, int exitCode) {
-    return new StringBuilder().append("Command ").append(hideOpenSslPassword(command)).append(" was finished with exit code: ")
-        .append(exitCode).append(" - ").append(getOpenSslExitCodeDescription(exitCode)).toString();
+    return new StringBuilder().append("Command ")
+        .append(hideOpenSslPassword(command))
+        .append(" was finished with exit code: ")
+        .append(exitCode).append(" - ")
+        .append(getOpenSslExitCodeDescription(exitCode)).toString();
   }
 
   private static String getOpenSslExitCodeDescription(int exitCode) {
@@ -141,15 +148,18 @@
       File newCertsDir = new File(dbDir, "newcerts");
       newCertsDir.mkdirs();
       try {
-        Set<PosixFilePermission> perms =
-            PosixFilePermissions.fromString("rwx------");
-        Files.setPosixFilePermissions(Paths.get(secDirFile.toURI()), perms);
-        Files.setPosixFilePermissions(Paths.get(dbDir.toURI()), perms);
-        Files.setPosixFilePermissions(Paths.get(newCertsDir.toURI()), perms);
+        RawLocalFileSystem fileSystem = new RawLocalFileSystem();
+        FsPermission permissions = new FsPermission(FsAction.ALL, FsAction.NONE,
+                                                    FsAction.NONE);
+        fileSystem.setPermission(new Path(dbDir.getAbsolutePath()),
+                                 permissions);
+        fileSystem.setPermission(new Path(dbDir.getAbsolutePath()), permissions);
+        fileSystem.setPermission(new Path(newCertsDir.getAbsolutePath()),
+                                 permissions);
         File indexFile = new File(dbDir, "index.txt");
         indexFile.createNewFile();
 
-        SecurityUtils.writeCaConfigFile(secDirFile.getAbsolutePath());
+        SecurityUtils.writeCaConfigFile(secDirFile.getAbsolutePath().replace('\\', '/'));
 
       } catch (IOException e) {
         LOG.error("Unable to create SSL configuration directories/files", e);
@@ -191,15 +201,19 @@
   }
 
   private static String getDefaultKeystoreLocation() {
-    Path workDir = null;
+    File workDir = null;
     try {
-      workDir = Files.createTempDirectory("sec");
+      workDir =  new File(FileUtils.getTempDirectory().getAbsolutePath()
+                          + "/sec" + System.currentTimeMillis());
+      if (!workDir.mkdirs()) {
+        throw new IOException("Unable to create temporary security directory");
+      }
     } catch (IOException e) {
       LOG.warn("Unable to create security directory");
       return null;
     }
 
-    return new StringBuilder().append(workDir.toAbsolutePath())
+    return new StringBuilder().append(workDir.getAbsolutePath())
         .append(File.separator)
         .append(SliderKeys.SECURITY_DIR)
         .append(File.separator)
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java
index ccce6cb..46c724c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/ForkedProcessService.java
@@ -27,6 +27,7 @@
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -65,8 +66,9 @@
  * it calls its {@link #stop()} method. If the error code was non-zero, 
  * the service is logged as having failed.
  */
-public class ForkedProcessService extends AbstractWorkflowExecutorService implements
-    LongLivedProcessLifecycleEvent, Runnable {
+public class ForkedProcessService
+    extends WorkflowExecutorService<ExecutorService>
+    implements LongLivedProcessLifecycleEvent, Runnable {
 
   /**
    * Log for the forked master process
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java
index ecc26b9..c8ff758 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/LongLivedProcess.java
@@ -112,7 +112,7 @@
     this.processLog = processLog;
     ServiceThreadFactory factory = new ServiceThreadFactory(name, true);
     processExecutor = Executors.newSingleThreadExecutor(factory);
-    logExecutor=    Executors.newSingleThreadExecutor(factory);
+    logExecutor = Executors.newSingleThreadExecutor(factory);
     processBuilder = new ProcessBuilder(commands);
     processBuilder.redirectErrorStream(false);
   }
@@ -288,6 +288,7 @@
    */
   @Override // Runnable
   public void run() {
+    Preconditions.checkNotNull(process, "null process");
     LOG.debug("Lifecycle callback thread running");
     //notify the callback that the process has started
     if (lifecycleCallback != null) {
@@ -326,10 +327,10 @@
   public void start() throws IOException {
 
     spawnChildProcess();
-    processExecutor.submit(this);
     processStreamReader =
-      new ProcessStreamReader(processLog, STREAM_READER_SLEEP_TIME);
+        new ProcessStreamReader(processLog, STREAM_READER_SLEEP_TIME);
     logExecutor.submit(processStreamReader);
+    processExecutor.submit(this);
   }
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java
index 6c50798..fae3a38 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowCallbackService.java
@@ -36,7 +36,7 @@
  * started in this service's <code>start()</code> operation.
  */
 public class WorkflowCallbackService<V> extends
-    AbstractWorkflowExecutorService {
+    WorkflowScheduledExecutorService<ScheduledExecutorService> {
   protected static final Logger LOG =
     LoggerFactory.getLogger(WorkflowCallbackService.class);
   private final Callable<V> callback;
@@ -48,7 +48,6 @@
   private Callable<V> callable;
   private ScheduledFuture<V> scheduledFuture;
 
-
   /**
    * Create an instance of the service
    * @param name service name
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/AbstractWorkflowExecutorService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowExecutorService.java
similarity index 83%
rename from slider-core/src/main/java/org/apache/slider/server/services/workflow/AbstractWorkflowExecutorService.java
rename to slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowExecutorService.java
index c26e3c4..7409d32 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/AbstractWorkflowExecutorService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowExecutorService.java
@@ -18,6 +18,7 @@
 
 package org.apache.slider.server.services.workflow;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.service.AbstractService;
 
 import java.util.concurrent.Callable;
@@ -28,16 +29,16 @@
  * A service that hosts an executor -when the service is stopped,
  * {@link ExecutorService#shutdownNow()} is invoked.
  */
-public abstract class AbstractWorkflowExecutorService extends AbstractService {
+public class WorkflowExecutorService<E extends ExecutorService> extends AbstractService {
 
-  private ExecutorService executor;
+  private E executor;
 
   /**
    * Construct an instance with the given name -but
    * no executor
    * @param name service name
    */
-  public AbstractWorkflowExecutorService(String name) {
+  public WorkflowExecutorService(String name) {
     this(name, null);
   }
 
@@ -46,8 +47,8 @@
    * @param name service name
    * @param executor exectuor
    */
-  protected AbstractWorkflowExecutorService(String name,
-      ExecutorService executor) {
+  public WorkflowExecutorService(String name,
+      E executor) {
     super(name);
     this.executor = executor;
   }
@@ -56,16 +57,17 @@
    * Get the executor
    * @return the executor
    */
-  public synchronized ExecutorService getExecutor() {
+  public synchronized E getExecutor() {
     return executor;
   }
 
   /**
-   * Set the executor. This is protected as it
-   * is intended to be restricted to subclasses
+   * Set the executor. Only valid if the current one is null
    * @param executor executor
    */
-  protected synchronized void setExecutor(ExecutorService executor) {
+  public synchronized void setExecutor(E executor) {
+    Preconditions.checkState(this.executor == null,
+        "Executor already set");
     this.executor = executor;
   }
 
@@ -87,14 +89,15 @@
   public <V> Future<V> submit(Callable<V> callable) {
     return getExecutor().submit(callable);
   }
+
   /**
    * Stop the service: halt the executor. 
    * @throws Exception exception.
    */
   @Override
   protected void serviceStop() throws Exception {
-    super.serviceStop();
     stopExecutor();
+    super.serviceStop();
   }
 
   /**
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AMViewForProviders.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowScheduledExecutorService.java
similarity index 61%
rename from slider-core/src/main/java/org/apache/slider/server/appmaster/AMViewForProviders.java
rename to slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowScheduledExecutorService.java
index 287035f..e9f53ed 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AMViewForProviders.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowScheduledExecutorService.java
@@ -16,12 +16,23 @@
  * limitations under the License.
  */
 
-package org.apache.slider.server.appmaster;
+package org.apache.slider.server.services.workflow;
 
-import org.apache.slider.core.exceptions.SliderException;
+import java.util.concurrent.ScheduledExecutorService;
 
-/** Operations available to a provider from AppMaster */
-public interface AMViewForProviders {
-  /** Provider can ask AppMaster to release a specific container */
-  void refreshContainer(String containerId, boolean newHostIfPossible) throws SliderException;
+/**
+ * Scheduled executor or subclass thereof
+ * @param <E> scheduled executor service type
+ */
+public class WorkflowScheduledExecutorService<E extends ScheduledExecutorService>
+    extends WorkflowExecutorService<E> {
+
+  public WorkflowScheduledExecutorService(String name) {
+    super(name);
+  }
+
+  public WorkflowScheduledExecutorService(String name,
+      E executor) {
+    super(name, executor);
+  }
 }
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java
index fab1b9f..36d059a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/package-info.java
@@ -154,7 +154,7 @@
  then signals its owning service to stop once that runnable is completed. 
  Any exception raised in the run is stored.
  </li>
- <li>{@link org.apache.slider.server.services.workflow.AbstractWorkflowExecutorService}:
+ <li>{@link org.apache.slider.server.services.workflow.WorkflowExecutorService}:
  A base class for services that wish to have a {@link java.util.concurrent.ExecutorService}
  with a lifespan mapped to that of a service. When the service is stopped, the
  {@link java.util.concurrent.ExecutorService#shutdownNow()} method is called to
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
index 8a4e5d8..74f7a3f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/AgentMiniClusterTestBase.groovy
@@ -133,13 +133,12 @@
  * @param blockUntilRunning block until the AM is running
  * @return launcher which will have executed the command.
  */
-  public ServiceLauncher<SliderClient> createMasterlessAM(
+  public ServiceLauncher<SliderClient> createStandaloneAM(
       String clustername,
-      int size,
       boolean deleteExistingData,
       boolean blockUntilRunning) {
     List<String> args = [];
-    return createMasterlessAMWithArgs(
+    return createStandaloneAMWithArgs(
         clustername,
         args,
         deleteExistingData,
@@ -155,7 +154,7 @@
  * @param blockUntilRunning block until the AM is running
  * @return launcher which will have executed the command.
  */
-  public ServiceLauncher<SliderClient> createMasterlessAMWithArgs(
+  public ServiceLauncher<SliderClient> createStandaloneAMWithArgs(
       String clustername,
       List<String> extraArgs,
       boolean deleteExistingData,
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy
index cb05fd0..9bfeb8c 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionExists.groovy
@@ -29,8 +29,6 @@
 import org.apache.slider.core.exceptions.UnknownApplicationInstanceException
 import org.apache.slider.core.main.LauncherExitCodes
 import org.apache.slider.core.main.ServiceLauncher
-import org.apache.slider.test.SliderTestUtils
-import org.junit.Assert
 import org.junit.Before
 import org.junit.Test
 
@@ -45,7 +43,7 @@
   @Before
   public void setup() {
     super.setup()
-    createMiniCluster("TestActionExists", configuration, 1, false)
+    createMiniCluster("", configuration, 1, false)
   }
   
   @Test
@@ -72,7 +70,10 @@
   public void testExistsLiveCluster() throws Throwable {
     //launch the cluster
     String clustername = createClusterName()
-    ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, false)
+    ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+        clustername,
+        true,
+        false)
     SliderClient sliderClient = launcher.service
     addToTeardown(launcher)
     ApplicationReport report = waitForClusterLive(sliderClient)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy
index 66fdc15..b7196ac 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionList.groovy
@@ -40,7 +40,7 @@
   @Before
   public void setup() {
     super.setup()
-    createMiniCluster("test_action_list", configuration, 1, false)
+    createMiniCluster("", configuration, 1, false)
   }
 
   /**
@@ -88,7 +88,10 @@
   public void testListLiveCluster() throws Throwable {
     //launch the cluster
     String clustername = createClusterName()
-    ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, false)
+    ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+        clustername,
+        true,
+        false)
     addToTeardown(launcher)
     //do the low level operations to get a better view of what is going on 
     SliderClient sliderClient = launcher.service
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy
index bdee39f..bae8cea 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/actions/TestActionStatus.groovy
@@ -30,10 +30,8 @@
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.client.SliderClient
 import org.apache.slider.common.params.ActionStatusArgs
-import org.apache.slider.common.params.ClientArgs
 import org.apache.hadoop.yarn.api.records.ApplicationReport
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.slider.core.main.LauncherExitCodes
 import org.apache.slider.core.main.ServiceLauncher
 import org.junit.Before
 import org.junit.Test
@@ -48,7 +46,7 @@
   @Before
   public void setup() {
     super.setup()
-    createMiniCluster("test_action_status", configuration, 1, false)
+    createMiniCluster("", configuration, 1, false)
   }
 
   /**
@@ -71,7 +69,7 @@
           new YarnConfiguration(miniCluster.config),
           [
               SliderActions.ACTION_STATUS,
-              "test_status_missing_cluster",
+              "teststatusmissingcluster",
               Arguments.ARG_MANAGER, RMAddr
           ]
       )
@@ -84,10 +82,13 @@
   
   public void testStatusLiveCluster() throws Throwable {
     describe("create a live cluster then exec the status command")
-    String clustername = "test_status_live_cluster"
+    String clustername = "teststatuslivecluster"
     
     //launch the cluster
-    ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, false)
+    ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+        clustername,
+        true,
+        false)
 
     SliderClient sliderClient = launcher.service
     ApplicationReport report = waitForClusterLive(sliderClient)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy
index 9d41b8c..f5eff25 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeCommands.groovy
@@ -42,15 +42,14 @@
 
   @Test
   public void testFreezeCommands() throws Throwable {
-    String clustername = "test_freeze_commands"
     YarnConfiguration conf = configuration
-    createMiniCluster(clustername, conf, 1, 1, 1, true, false)
+    String clustername = createMiniCluster("", conf, 1, 1, 1, true, false)
 
     describe "create a masterless AM, freeze it, try to freeze again"
 
-    ServiceLauncher<SliderClient> launcher = createMasterlessAM(
-        clustername,
-        0,
+    ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+        clustername
+        ,
         true,
         true);
     addToTeardown(launcher.service);
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy
index e833423..04be7c0 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeThawMasterlessAM.groovy
@@ -22,6 +22,7 @@
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.fs.FileSystem as HadoopFS
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.yarn.api.records.ApplicationReport
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.slider.agent.AgentMiniClusterTestBase
 import org.apache.slider.client.SliderClient
@@ -48,9 +49,8 @@
 
   @Test
   public void testFreezeThawMasterlessAM() throws Throwable {
-    String clustername = "test_freeze_thaw_masterless_am"
     YarnConfiguration conf = configuration
-    createMiniCluster(clustername, conf, 1, 1, 1, true, false)
+    String clustername = createMiniCluster("", conf, 1, 1, 1, true, false)
     
     describe "create a masterless AM, freeze it, thaw it"
     //copy the confdir somewhere
@@ -59,7 +59,10 @@
     SliderUtils.copyDirectory(conf, resConfPath, tempConfPath, null)
 
 
-    ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, true)
+    ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+        clustername,
+        true,
+        true)
     SliderClient sliderClient = launcher.service
     addToTeardown(sliderClient);
 
@@ -74,8 +77,10 @@
     //now start the cluster
     ServiceLauncher launcher2 = thawCluster(clustername, [], true);
     SliderClient newCluster = launcher2.service
+    addToTeardown(newCluster);
+
+//    ApplicationReport report = waitForClusterLive(newCluster)
     newCluster.getClusterDescription(clustername);
-    
     //freeze
     assert 0 == clusterActionFreeze(sliderClient, clustername)
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy
index ebec43b..b81bc77 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestFreezeUnknownCluster.groovy
@@ -39,9 +39,8 @@
 
   @Test
   public void testFreezeUnknownCluster() throws Throwable {
-    String clustername = "test_start_unknown_cluster"
     YarnConfiguration conf = configuration
-    createMiniCluster(clustername, conf, 1, true)
+    String clustername = createMiniCluster("", conf, 1, true)
 
     describe "try to freeze a cluster that isn't defined"
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy
index 4c90445..b49032d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/freezethaw/TestThawUnknownCluster.groovy
@@ -37,8 +37,7 @@
 
   @Test
   public void testThawUnknownCluster() throws Throwable {
-    String clustername = "test_thaw_unknown_cluster"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "try to start a cluster that isn't defined"
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy
index 160420b..b253c17 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadAMHeap.groovy
@@ -39,14 +39,13 @@
 
   @Test
   public void testBadAMHeap() throws Throwable {
-    String clustername = "test_bad_am_heap"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that bad Java heap options are picked up"
 
     try {
       ServiceLauncher<SliderClient> launcher =
-          createMasterlessAMWithArgs(clustername,
+          createStandaloneAMWithArgs(clustername,
               [
                   Arguments.ARG_COMP_OPT,
                   SliderKeys.COMPONENT_AM,
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy
index 5509314..13d5398 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBadYarnQueue.groovy
@@ -44,14 +44,13 @@
   @Test
   public void testBadYarnQueue() throws Throwable {
     skip("untestable in minicluster")
-    String clustername = "test_bad_yarn_queue"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that a bad yarn queue fails the launch"
 
     try {
       ServiceLauncher<SliderClient> launcher =
-          createMasterlessAMWithArgs(clustername,
+          createStandaloneAMWithArgs(clustername,
               [
                   Arguments.ARG_DEFINE,
                   SliderXmlConfKeys.KEY_YARN_QUEUE + "=noqueue",
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy
index 2933c90..0665a9a 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestBuildStandaloneAM.groovy
@@ -22,6 +22,8 @@
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.ApplicationReport
 import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.api.ClusterDescription
+import org.apache.slider.api.ResourceKeys
 import org.apache.slider.client.SliderClient
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.SliderActions
@@ -30,6 +32,11 @@
 import org.apache.slider.core.main.ServiceLauncher
 import org.junit.Test
 
+import static org.apache.slider.common.params.Arguments.ARG_COMP_OPT
+import static org.apache.slider.common.params.Arguments.ARG_RESOURCE_OPT
+import static org.apache.slider.common.params.Arguments.ARG_RES_COMP_OPT
+import static org.apache.slider.providers.agent.AgentKeys.SERVICE_NAME
+
 @CompileStatic
 @Slf4j
 
@@ -37,8 +44,7 @@
 
   @Test
   public void testBuildCluster() throws Throwable {
-    String clustername = "test_build_cluster"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that a build cluster is created but not started"
 
@@ -46,6 +52,67 @@
         SliderActions.ACTION_BUILD,
         clustername,
         [:],
+        [ARG_RESOURCE_OPT, "yarn.container.failure.window.years", "4"],
+        true,
+        false,
+        agentDefOptions)
+    SliderClient sliderClient = launcher.service
+    addToTeardown(sliderClient);
+
+    //verify that exists(live) is now false
+    assert LauncherExitCodes.EXIT_FALSE ==
+           sliderClient.actionExists(clustername, true)
+
+    //but the cluster is still there for the default
+    assert 0 == sliderClient.actionExists(clustername, false)
+    
+    
+    
+    // verify the YARN registry doesn't know of it
+    def serviceRegistryClient = sliderClient.YARNRegistryClient
+    ApplicationReport report = serviceRegistryClient.findInstance(clustername)
+    assert report == null;
+    
+    // verify that global resource options propagate from the CLI
+    def aggregateConf = sliderClient.loadPersistedClusterDescription(clustername)
+    def windowDays = aggregateConf.resourceOperations.globalOptions.getMandatoryOptionInt(
+        "yarn.container.failure.window.years")
+    assert 4 == windowDays
+
+    //and a second attempt will fail as the cluster now exists
+    try {
+      ServiceLauncher<SliderClient> cluster2 = createOrBuildCluster(
+          SliderActions.ACTION_BUILD,
+          clustername,
+          [:],
+          [],
+          false,
+          false,
+          agentDefOptions)
+      fail("expected an exception, got $cluster2.service")
+    } catch (SliderException e) {
+      assertExceptionDetails(e, SliderExitCodes.EXIT_INSTANCE_EXISTS, "")
+    }
+
+    
+    
+    //thaw time
+    ServiceLauncher<SliderClient> l2 = thawCluster(clustername, [], true)
+    SliderClient thawed = l2.service
+    addToTeardown(thawed);
+    waitForClusterLive(thawed)
+  }
+
+  @Test
+  public void testUpdateCluster() throws Throwable {
+    String clustername = createMiniCluster("", configuration, 1, true)
+
+    describe "verify that a built cluster can be updated"
+
+    ServiceLauncher<SliderClient> launcher = createOrBuildCluster(
+        SliderActions.ACTION_BUILD,
+        clustername,
+        [:],
         [],
         true,
         false,
@@ -64,26 +131,29 @@
     ApplicationReport report = serviceRegistryClient.findInstance(clustername)
     assert report == null;
 
-    //and a second attempt will fail as the cluster now exists
-    try {
-      ServiceLauncher<SliderClient> cluster2 = createOrBuildCluster(
-          SliderActions.ACTION_BUILD,
-          clustername,
-          [:],
-          [],
-          false,
-          false,
-          agentDefOptions)
-      fail("expected an exception, got $cluster2.service")
-    } catch (SliderException e) {
-      assertExceptionDetails(e, SliderExitCodes.EXIT_INSTANCE_EXISTS, "")
-    }
+    def master = "hbase-master"
+    createOrBuildCluster(
+        SliderActions.ACTION_UPDATE,
+        clustername,
+        [(master): 1],
+        [
+            ARG_RES_COMP_OPT, master, ResourceKeys.COMPONENT_PRIORITY, "2",
+            ARG_COMP_OPT, master, SERVICE_NAME, "HBASE",
+        ],
+        true,
+        false,
+        agentDefOptions)
 
-    //thaw time
-    ServiceLauncher<SliderClient> l2 = thawCluster(clustername, [], true)
-    SliderClient thawed = l2.service
-    addToTeardown(thawed);
-    waitForClusterLive(thawed)
+    launcher = thawCluster(clustername, [], true);
+    sliderClient = launcher.service
+    addToTeardown(sliderClient);
+    waitForClusterLive(sliderClient)
+
+    dumpClusterStatus(sliderClient, "application status after update")
+
+    ClusterDescription cd = sliderClient.clusterDescription
+    Map<String, String> masterRole = cd.getRole(master)
+    assert masterRole != null, "Role hbase-master must exist"
+    assert cd.roleNames.contains(master), "Role names must contain hbase-master"
   }
-
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestDestroyMasterlessAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
similarity index 90%
rename from slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestDestroyMasterlessAM.groovy
rename to slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
index 37fa0ec..17e4ff2 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestDestroyMasterlessAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
@@ -39,14 +39,13 @@
 @CompileStatic
 @Slf4j
 
-class TestDestroyMasterlessAM extends AgentMiniClusterTestBase {
+class TestStandaloneAMDestroy extends AgentMiniClusterTestBase {
 
   @Test
-  public void testDestroyMasterlessAM() throws Throwable {
-    String clustername = "test_destroy_masterless_am"
-    createMiniCluster(clustername, configuration, 1, false)
+  public void testDestroyStandaloneAM() throws Throwable {
+    String clustername = createMiniCluster("", configuration, 1, false)
 
-    describe "create a masterless AM, stop it, try to create" +
+    describe "create a Standalone AM, stop it, try to create" +
              "a second cluster with the same name, destroy it, try a third time"
 
     ServiceLauncher<SliderClient> launcher1 = launchClientAgainstMiniMR(
@@ -58,9 +57,10 @@
         ])
     assert launcher1.serviceExitCode == 0
 
-
-
-    ServiceLauncher<SliderClient> launcher = createMasterlessAM(clustername, 0, true, true)
+    ServiceLauncher<SliderClient> launcher = createStandaloneAM(
+        clustername,
+        true,
+        true)
     SliderClient sliderClient = launcher.service
     addToTeardown(sliderClient);
 
@@ -82,7 +82,7 @@
     
     //now try to create instance #2, and expect an in-use failure
     try {
-      createMasterlessAM(clustername, 0, false, false)
+      createStandaloneAM(clustername, false, false)
       fail("expected a failure, got an AM")
     } catch (SliderException e) {
       assertExceptionDetails(e,
@@ -122,7 +122,7 @@
     describe "recreating $clustername"
 
     //and create a new cluster
-    launcher = createMasterlessAM(clustername, 0, false, true)
+    launcher = createStandaloneAM(clustername, false, true)
     SliderClient cluster2 = launcher.service
 
     // do an echo here of a large string
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestKillMasterlessAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMKill.groovy
similarity index 86%
rename from slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestKillMasterlessAM.groovy
rename to slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMKill.groovy
index b4331af..75f9a2c 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestKillMasterlessAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMKill.groovy
@@ -36,17 +36,16 @@
 @CompileStatic
 @Slf4j
 
-class TestKillMasterlessAM extends AgentMiniClusterTestBase {
+class TestStandaloneAMKill extends AgentMiniClusterTestBase {
 
 
   @Test
-  public void testKillMasterlessAM() throws Throwable {
-    String clustername = "test_kill_masterless_am"
-    createMiniCluster(clustername, configuration, 1, true)
+  public void testKillStandaloneAM() throws Throwable {
+    String clustername = createMiniCluster("", configuration, 1, true)
 
-    describe "kill a masterless AM and verify that it shuts down"
+    describe "kill a Standalone AM and verify that it shuts down"
     ServiceLauncher<SliderClient> launcher =
-        createMasterlessAMWithArgs(clustername,
+        createStandaloneAMWithArgs(clustername,
           [
               Arguments.ARG_OPTION, SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, "1"
           ],
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
new file mode 100644
index 0000000..162bab0
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.slider.agent.standalone
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.SleepJob
+import org.apache.hadoop.yarn.api.records.ApplicationReport
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.api.InternalKeys
+import org.apache.slider.api.ResourceKeys
+import org.apache.slider.client.SliderClient
+import org.apache.slider.common.SliderXmlConfKeys
+import org.apache.slider.common.params.ActionAMSuicideArgs
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.core.exceptions.ErrorStrings
+import org.apache.slider.core.main.ServiceLauncher
+import org.junit.Test
+
+/**
+ * kill a masterless AM and verify it shuts down. This test
+ * also sets the retry count to 1 to stop recreation attempts
+ */
+@CompileStatic
+@Slf4j
+
+class TestStandaloneAMMonkeyRestart extends AgentMiniClusterTestBase {
+
+
+  @Test
+  public void testStandaloneAMMonkeyRestart() throws Throwable {
+    describe "Run a Standalone AM with the Chaos monkey set to kill it"
+    // patch the configuration for AM restart
+    int threshold = 2;
+    YarnConfiguration conf = getRestartableConfiguration(threshold)
+
+    String clustername = createMiniCluster("", conf, 1, true)
+    ServiceLauncher<SliderClient> launcher =
+        createStandaloneAMWithArgs(clustername,
+            [
+                Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_ENABLED, "true",
+                Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "8",
+                Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "75000",
+            ],
+            true,
+            false)
+    SliderClient sliderClient = launcher.service
+    addToTeardown(sliderClient);
+
+    ApplicationReport report
+    report = waitForClusterLive(sliderClient, 30000)
+    describe "Waiting for the cluster to fail"
+    sleep(40000)
+    // end of process
+    report = sliderClient.applicationReport
+    log.info(report.diagnostics)
+    assert report.currentApplicationAttemptId.attemptId == threshold
+    assert YarnApplicationState.FAILED == report.yarnApplicationState  
+    assert FinalApplicationStatus.FAILED == report.finalApplicationStatus
+  }
+
+  /**
+   * Get a restartable configuration
+   * @param restarts
+   * @return
+   */
+  public YarnConfiguration getRestartableConfiguration(int restarts) {
+    def conf = new YarnConfiguration(configuration)
+    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, restarts)
+    conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, restarts)
+    conf
+  }
+
+
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
new file mode 100644
index 0000000..8d9318a
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.slider.agent.standalone
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.ApplicationReport
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.client.SliderClient
+import org.apache.slider.common.SliderXmlConfKeys
+import org.apache.slider.common.params.ActionAMSuicideArgs
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.core.main.ServiceLauncher
+import org.junit.Test
+
+/**
+ * kill a masterless AM and verify it shuts down. This test
+ * also sets the retry count to 1 to stop recreation attempts
+ */
+@CompileStatic
+@Slf4j
+
+class TestStandaloneAMRestart extends AgentMiniClusterTestBase {
+
+
+  @Test
+  public void testRestartStandaloneAM() throws Throwable {
+    describe "kill a Standalone AM and verify that it restarts"
+    // patch the configuration for AM restart
+    YarnConfiguration conf = getRestartableConfiguration(5)
+
+    String clustername = createMiniCluster("", conf, 1, true)
+    ServiceLauncher<SliderClient> launcher =
+        createStandaloneAMWithArgs(clustername,
+            [
+                Arguments.ARG_OPTION, SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, "4"
+            ],
+            true,
+            false)
+    SliderClient sliderClient = launcher.service
+    addToTeardown(sliderClient);
+
+    ApplicationReport report = waitForClusterLive(sliderClient)
+    logReport(report)
+    waitUntilClusterLive(sliderClient, 30000)
+
+
+    ActionAMSuicideArgs args = new ActionAMSuicideArgs()
+    args.message = "test AM iteration"
+    args.waittime = 100
+    args.exitcode = 1
+    sliderClient.actionAmSuicide(clustername, args)
+    waitWhileClusterLive(sliderClient);
+    //give yarn some time to notice
+    sleep(20000)
+    waitUntilClusterLive(sliderClient, 40000)
+
+
+    // app should be running here
+    assert 0 == sliderClient.actionExists(clustername, true)
+    
+    
+    clusterActionFreeze(sliderClient, clustername)
+  }
+
+  /**
+   * Get a restartable configuration
+   * @param restarts
+   * @return
+   */
+  public YarnConfiguration getRestartableConfiguration(int restarts) {
+    def conf = new YarnConfiguration(configuration)
+    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, restarts)
+    conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, restarts)
+    conf
+  }
+
+
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy
index bce24e6..73e9b07 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAgentAM.groovy
@@ -37,17 +37,16 @@
 @CompileStatic
 @Slf4j
 class TestStandaloneAgentAM  extends AgentMiniClusterTestBase {
+  
   @Test
   public void testStandaloneAgentAM() throws Throwable {
 
-
     describe "create a masterless AM then get the service and look it up via the AM"
 
     //launch fake master
-    String clustername = "test_standalone_agent_am"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
     ServiceLauncher<SliderClient> launcher =
-        createMasterlessAM(clustername, 0, true, false)
+        createStandaloneAM(clustername, true, false)
     SliderClient client = launcher.service
     addToTeardown(client);
 
@@ -124,7 +123,7 @@
     assert oldInstance.yarnApplicationState >= YarnApplicationState.FINISHED
 
     //create another AM
-    launcher = createMasterlessAM(clustername, 0, true, true)
+    launcher = createStandaloneAM(clustername, true, true)
     client = launcher.service
     ApplicationId i2AppID = client.applicationId
 
@@ -141,7 +140,7 @@
     describe("attempting to create instance #3")
     //now try to create instance #3, and expect an in-use failure
     try {
-      createMasterlessAM(clustername, 0, false, true)
+      createStandaloneAM(clustername, false, true)
       fail("expected a failure, got a masterless AM")
     } catch (SliderException e) {
       assertFailureClusterInUse(e);
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy
index b2cb670..cf97520 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneBadClusterName.groovy
@@ -38,7 +38,7 @@
     describe "verify that bad cluster names are picked up"
 
     try {
-      addToTeardown(createMasterlessAM(clustername, 0, true, false).service);
+      addToTeardown(createStandaloneAM(clustername, true, false).service);
       fail("expected a failure")
     } catch (ServiceLaunchException e) {
       assertExceptionDetails(e, LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR)
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy
index cb55624..b8a590e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneRegistryAM.groovy
@@ -62,10 +62,9 @@
     describe "create a masterless AM then perform registry operations on it"
 
     //launch fake master
-    String clustername = "test_standalone_registry_am"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster(configuration, 1, true)
     ServiceLauncher<SliderClient> launcher
-    launcher = createMasterlessAM(clustername, 0, true, false)
+    launcher = createStandaloneAM(clustername, true, false)
     SliderClient client = launcher.service
     addToTeardown(client);
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy b/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy
index 12e4a43..d94cd54 100644
--- a/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/client/TestCommonArgParsing.groovy
@@ -391,6 +391,19 @@
   }
   
   @Test
+  public void testArgUpdate() throws Throwable {
+    def ca = createClientArgs([
+        ACTION_UPDATE, 'cluster1',
+        ARG_TEMPLATE, "appConfig.json",
+    ])
+    assert ca.action == ACTION_UPDATE
+    assert ca.coreAction instanceof ActionUpdateArgs
+    assert ca.actionUpdateArgs instanceof ActionUpdateArgs
+    AbstractClusterBuildingActionArgs args = ca.actionUpdateArgs
+    assert args.template != null
+  }
+  
+  @Test
   public void testFlexArgs() throws Throwable {
     def ca = createClientArgs([
         ACTION_FLEX, 'cluster1',
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy
index 9b4c377..6dee64f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/AgentTestBase.groovy
@@ -140,6 +140,36 @@
         clusterOps)
   }
 
+  /**
+   * Update an agent cluster
+   * @param clustername
+   * @param roles
+   * @param extraArgs
+   * @param deleteExistingData
+   * @return the cluster launcher
+   */
+  public ServiceLauncher<SliderClient> updateAgentCluster(
+      String clustername,
+      Map<String, Integer> roles,
+      List<String> extraArgs,
+      boolean deleteExistingData) {
+
+    YarnConfiguration conf = testConfiguration
+
+    def clusterOps = [
+        :
+    ]
+
+    return createOrBuildCluster(
+        SliderActions.ACTION_UPDATE,
+        clustername,
+        roles,
+        extraArgs,
+        deleteExistingData,
+        false,
+        clusterOps)
+  }
+
   public String getApplicationHome() {
     return "/"
   }
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy
index 7d68458..e248ec3 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentAMManagementWS.groovy
@@ -27,22 +27,24 @@
 import org.apache.slider.common.SliderKeys
 import org.apache.slider.core.conf.MapOperations
 import org.apache.slider.core.main.ServiceLauncher
-import org.apache.slider.server.appmaster.web.SliderAMWebApp
 import org.apache.slider.server.appmaster.web.rest.agent.RegistrationResponse
 import org.apache.slider.server.appmaster.web.rest.agent.RegistrationStatus
 import org.apache.slider.server.services.security.CertificateManager
 import org.apache.slider.server.services.security.SecurityUtils
+import org.junit.Before
 import org.junit.Test
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
 
+import javax.net.ssl.HostnameVerifier
+import javax.net.ssl.HttpsURLConnection
+import javax.net.ssl.SSLSession
 import javax.ws.rs.core.MediaType
 
 import static org.apache.slider.common.params.Arguments.ARG_OPTION
 import static org.apache.slider.providers.agent.AgentKeys.*
 import static org.apache.slider.providers.agent.AgentTestUtils.createDummyJSONRegister
 import static org.apache.slider.providers.agent.AgentTestUtils.createTestClient
-import static org.apache.slider.test.SliderTestUtils.log
 
 @CompileStatic
 @Slf4j
@@ -52,10 +54,10 @@
     final static Logger logger = LoggerFactory.getLogger(TestAgentAMManagementWS.class)
     static {
         //for localhost testing only
-        javax.net.ssl.HttpsURLConnection.setDefaultHostnameVerifier(
-                new javax.net.ssl.HostnameVerifier(){
+        HttpsURLConnection.setDefaultHostnameVerifier(
+                new HostnameVerifier(){
                     public boolean verify(String hostname,
-                                          javax.net.ssl.SSLSession sslSession) {
+                                          SSLSession sslSession) {
                         logger.info("verifying hostname ${hostname}")
                         InetAddress[] addresses =
                             InetAddress.getAllByName(hostname);
@@ -73,6 +75,12 @@
                     }
                 });
 
+    }
+
+    @Override
+    @Before
+    void setup() {
+        super.setup()
         MapOperations compOperations = new MapOperations();
         compOperations.put(SliderKeys.KEYSTORE_LOCATION, "/tmp/work/security/keystore.p12");
         SecurityUtils.initializeSecurityParameters(compOperations);
@@ -83,13 +91,12 @@
         System.setProperty("javax.net.ssl.trustStore", keystoreFile);
         System.setProperty("javax.net.ssl.trustStorePassword", password);
         System.setProperty("javax.net.ssl.trustStoreType", "PKCS12");
+
     }
 
-  @Test
+    @Test
   public void testAgentAMManagementWS() throws Throwable {
-    def clustername = "test_agentammanagementws"
-    createMiniCluster(
-        clustername,
+      String clustername = createMiniCluster("",
         configuration,
         1,
         1,
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
index 2f03b09..a29c8cb 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestAgentEcho.groovy
@@ -43,9 +43,7 @@
 
   @Test
   public void testEchoOperation() throws Throwable {
-    def clustername = "test_agent_echo"
-    createMiniCluster(
-        clustername,
+    String clustername = createMiniCluster("",
         configuration,
         1,
         1,
@@ -68,7 +66,7 @@
 
     def role = "echo"
     Map<String, Integer> roles = [
-        (role): 1,
+        (role): 2,
     ];
     ServiceLauncher<SliderClient> launcher = buildAgentCluster(clustername,
         roles,
diff --git a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy
index 421920f..99f7f49 100644
--- a/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/providers/agent/TestBuildBasicAgent.groovy
@@ -69,9 +69,8 @@
   @Test
   public void testBuildMultipleRoles() throws Throwable {
 
-    def clustername = "test_build_basic_agent"
-    createMiniCluster(
-        clustername,
+    String clustername = createMiniCluster(
+        "",
         configuration,
         1,
         1,
@@ -223,7 +222,98 @@
         true, false,
         false)
   }
+  
+  @Test
+  public void testUpdateBasicAgent() throws Throwable {
 
+    String clustername = createMiniCluster(
+        "",
+        configuration,
+        1,
+        1,
+        1,
+        true,
+        false)
+    
+    def master = "hbase-master"
+    def rs = "hbase-rs"
+    ServiceLauncher<SliderClient> launcher = buildAgentCluster(clustername,
+        [
+            (ROLE_NODE): 5,
+            (master): 1,
+            (rs): 5
+        ],
+        [
+            ARG_OPTION, CONTROLLER_URL, "http://localhost",
+            ARG_OPTION, PACKAGE_PATH, ".",
+            ARG_OPTION, APP_DEF, "file://" + getAppDef().absolutePath,
+            ARG_OPTION, AGENT_CONF, "file://" + getAgentConf().absolutePath,
+            ARG_COMP_OPT, master, SCRIPT_PATH, "agent/scripts/agent.py",
+            ARG_COMP_OPT, rs, SCRIPT_PATH, "agent/scripts/agent.py",
+            ARG_RES_COMP_OPT, master, ResourceKeys.COMPONENT_PRIORITY, "2",
+            ARG_RES_COMP_OPT, rs, ResourceKeys.COMPONENT_PRIORITY, "3",
+            ARG_COMP_OPT, master, SERVICE_NAME, "HBASE",
+            ARG_COMP_OPT, rs, SERVICE_NAME, "HBASE",
+            ARG_COMP_OPT, master, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+            ARG_COMP_OPT, rs, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+            ARG_COMP_OPT, ROLE_NODE, SCRIPT_PATH, "agent/scripts/agent.py",
+            ARG_RES_COMP_OPT, ROLE_NODE, ResourceKeys.COMPONENT_PRIORITY, "1",
+        ],
+        true, false,
+        false)
+    def instanceD = launcher.service.loadPersistedClusterDescription(
+        clustername)
+    dumpClusterDescription("$clustername:", instanceD)
+    def resource = instanceD.getResourceOperations()
+
+    def agentComponent = resource.getMandatoryComponent(ROLE_NODE)
+    agentComponent.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+    def masterC = resource.getMandatoryComponent(master)
+    assert "2" == masterC.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+    def rscomponent = resource.getMandatoryComponent(rs)
+    assert "5" == rscomponent.getMandatoryOption(ResourceKeys.COMPONENT_INSTANCES)
+
+    // change master priority and rs instances through update action
+    ServiceLauncher<SliderClient> launcher2 = updateAgentCluster(clustername,
+        [
+            (ROLE_NODE): 5,
+            (master): 1,
+            (rs): 6
+        ],
+        [
+            ARG_OPTION, CONTROLLER_URL, "http://localhost",
+            ARG_OPTION, PACKAGE_PATH, ".",
+            ARG_OPTION, APP_DEF, "file://" + getAppDef().absolutePath,
+            ARG_OPTION, AGENT_CONF, "file://" + getAgentConf().absolutePath,
+            ARG_COMP_OPT, master, SCRIPT_PATH, "agent/scripts/agent.py",
+            ARG_COMP_OPT, rs, SCRIPT_PATH, "agent/scripts/agent.py",
+            ARG_RES_COMP_OPT, master, ResourceKeys.COMPONENT_PRIORITY, "4",
+            ARG_RES_COMP_OPT, rs, ResourceKeys.COMPONENT_PRIORITY, "3",
+            ARG_COMP_OPT, master, SERVICE_NAME, "HBASE",
+            ARG_COMP_OPT, rs, SERVICE_NAME, "HBASE",
+            ARG_COMP_OPT, master, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+            ARG_COMP_OPT, rs, AgentKeys.APP_HOME, "/share/hbase/hbase-0.96.1-hadoop2",
+            ARG_COMP_OPT, ROLE_NODE, SCRIPT_PATH, "agent/scripts/agent.py",
+            ARG_RES_COMP_OPT, ROLE_NODE, ResourceKeys.COMPONENT_PRIORITY, "1",
+        ],
+        true)
+    def instanceDef = launcher.service.loadPersistedClusterDescription(
+        clustername)
+    dumpClusterDescription("$clustername:", instanceDef)
+    def resource2 = instanceDef.getResourceOperations()
+
+    def agentComponent2 = resource2.getMandatoryComponent(ROLE_NODE)
+    agentComponent2.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+    def masterC2 = resource2.getMandatoryComponent(master)
+    assert "4" == masterC2.getMandatoryOption(ResourceKeys.COMPONENT_PRIORITY)
+
+    def rscomponent2 = resource2.getMandatoryComponent(rs)
+    assert "6" == rscomponent2.getMandatoryOption(ResourceKeys.COMPONENT_INSTANCES)
+  }
+  
   public AggregateConf loadInstanceDefinition(String name) {
     def cluster4
     def sliderFS = createSliderFileSystem()
@@ -235,10 +325,9 @@
   }
 
   @Test
-  public void testAgentArgs() throws Throwable {
-    def clustername = "test_good_agent_args"
-    createMiniCluster(
-        clustername,
+  public void testGoodAgentArgs() throws Throwable {
+    String clustername = createMiniCluster(
+        "",
         configuration,
         1,
         1,
@@ -253,7 +342,7 @@
           [
               ARG_OPTION, CONTROLLER_URL, "http://localhost",
               ARG_PACKAGE, ".",
-              ARG_OPTION, APP_DEF, "file://" + getAppDef().absolutePath,
+              ARG_OPTION, APP_DEF, "file://" + appDef.absolutePath,
               ARG_RESOURCES, TEST_FILES + "good/resources.json",
               ARG_TEMPLATE, TEST_FILES + "good/appconf.json"
           ],
@@ -268,9 +357,8 @@
   
   @Test
   public void testBadAgentArgs() throws Throwable {
-    def clustername = "test_bad_agent_args"
-    createMiniCluster(
-        clustername,
+    String clustername = createMiniCluster(
+        "",
         configuration,
         1,
         1,
@@ -360,10 +448,8 @@
   @Test
   public void testTemplateArgs() throws Throwable {
 
-
-    def clustername = "test_build_template_args"
-    createMiniCluster(
-        clustername,
+    String clustername = createMiniCluster(
+        "",
         configuration,
         1,
         1,
@@ -388,10 +474,8 @@
   @Test
   public void testBadTemplates() throws Throwable {
 
-
-    def clustername = "test_bad_template_args"
-    createMiniCluster(
-        clustername,
+    String clustername = createMiniCluster(
+        "",
         configuration,
         1,
         1,
diff --git a/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy b/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy
index 4cc0f08..1a1e5aa 100644
--- a/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/registry/curator/TestRegistryRestResources.groovy
@@ -50,6 +50,7 @@
 
   public static final String REGISTRY_URI = RestPaths.SLIDER_PATH_REGISTRY;
   public static final String WADL = "vnd.sun.wadl+xml"
+  public static final String CLUSTERNAME = "testregistryws"
 
 
   private String id(String instanceName) {
@@ -64,7 +65,7 @@
 
   @Test
   public void testRestURIs() throws Throwable {
-    def clustername = "test_registryws"
+    def clustername = CLUSTERNAME
     createMiniCluster(
         clustername,
         configuration,
@@ -143,7 +144,8 @@
 
     webResource = client.resource(
         appendToURL(registry_url,
-            "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/"+id("test_registryws")));
+            "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/"+id(
+                clustername)));
     service = webResource.type(MediaType.APPLICATION_JSON)
               .get(CuratorServiceInstance.class);
     validateService(service)
@@ -164,7 +166,7 @@
 
     try {
       webResource = client.resource(appendToURL(registry_url,
-          "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/test_registryws-99"));
+          "${RestPaths.REGISTRY_SERVICE}/${SliderKeys.APP_TYPE}/testregistryws99"));
       
       service = webResource.type(MediaType.APPLICATION_JSON)
                            .get(CuratorServiceInstance.class);
@@ -188,6 +190,6 @@
   private void validateService(CuratorServiceInstance service) {
     assert service.name.equals(SliderKeys.APP_TYPE)
     assert service.serviceType == ServiceType.DYNAMIC
-    assert service.id.contains("test_registryws")
+    assert service.id.contains(CLUSTERNAME)
   }
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/actions/TestActions.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/actions/TestActions.groovy
new file mode 100644
index 0000000..7e03e7b
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/actions/TestActions.groovy
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions
+
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.service.ServiceOperations
+import org.apache.slider.server.appmaster.SliderAppMaster
+import org.apache.slider.server.appmaster.state.AppState
+import org.apache.slider.server.services.workflow.ServiceThreadFactory
+import org.apache.slider.server.services.workflow.WorkflowExecutorService
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.AtomicLong
+
+@Slf4j
+//@CompileStatic
+class TestActions {
+
+  QueueService queues;
+  WorkflowExecutorService<ExecutorService> executorService;
+
+
+  @Before
+  void createService() {
+    queues = new QueueService();
+
+    def conf = new Configuration()
+    queues.init(conf)
+
+    queues.start();
+
+    executorService = new WorkflowExecutorService<>("AmExecutor",
+        Executors.newCachedThreadPool(
+            new ServiceThreadFactory("AmExecutor", true)));
+
+    executorService.init(conf)
+    executorService.start();
+  }
+
+  @After
+  void destroyService() {
+    ServiceOperations.stop(executorService);
+    ServiceOperations.stop(queues);
+  }
+
+  @Test
+  public void testBasicService() throws Throwable {
+    queues.start();
+  }
+
+  @Test
+  public void testDelayLogic() throws Throwable {
+    ActionNoteExecuted action = new ActionNoteExecuted("", 1000)
+    long now = System.currentTimeMillis();
+
+    def delay = action.getDelay(TimeUnit.MILLISECONDS)
+    assert delay >= 800
+    assert delay <= 1800
+
+    ActionNoteExecuted a2 = new ActionNoteExecuted("a2", 10000)
+    assert action.compareTo(a2) < 0
+    assert a2.compareTo(action) > 0
+    assert action.compareTo(action)== 0
+    
+  }
+
+  @Test
+  public void testActionDelayedExecutorTermination() throws Throwable {
+    long start = System.currentTimeMillis()
+    
+    ActionStopQueue stopAction = new ActionStopQueue(1000);
+    queues.scheduledActions.add(stopAction);
+    queues.run();
+    AsyncAction take = queues.actionQueue.take();
+    assert take == stopAction
+    long stop = System.currentTimeMillis();
+    assert stop - start > 500
+    assert stop - start < 1500
+  }
+
+  @Test
+  public void testImmediateQueue() throws Throwable {
+    ActionNoteExecuted noteExecuted = new ActionNoteExecuted("executed", 0)
+    queues.put(noteExecuted)
+    queues.put(new ActionStopQueue(0))
+    QueueExecutor ex = new QueueExecutor(queues)
+    ex.run();
+    assert queues.actionQueue.empty
+    assert noteExecuted.executed.get()
+  }
+
+  @Test
+  public void testActionOrdering() throws Throwable {
+
+    ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+    def stop = new ActionStopQueue(1500)
+    ActionNoteExecuted note2 = new ActionNoteExecuted("note2", 800)
+
+    List<AsyncAction> actions = [note1, stop, note2]
+    Collections.sort(actions)
+    assert actions[0] == note1
+    assert actions[1] == note2
+    assert actions[2] == stop
+  }
+  
+  @Test
+  public void testDelayedQueueWithReschedule() throws Throwable {
+    
+    ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+    def stop = new ActionStopQueue(1500)
+    ActionNoteExecuted note2 = new ActionNoteExecuted("note2", 800)
+    
+    assert note2.compareTo(stop) < 0
+    assert note1.nanos < note2.nanos
+    assert note2.nanos < stop.nanos
+    queues.schedule(note1)
+    queues.schedule(note2)
+    queues.schedule(stop)
+    // async to sync expected to run in order
+    runQueuesToCompletion()
+    assert note1.executed.get()
+    assert note2.executed.get()
+  }
+
+  public void runQueuesToCompletion() {
+    queues.run();
+    assert queues.scheduledActions.empty
+    assert !queues.actionQueue.empty
+    QueueExecutor ex = new QueueExecutor(queues)
+    ex.run();
+    // flush all stop commands from the queue
+    queues.flushActionQueue(ActionStopQueue.class)
+    
+    assert queues.actionQueue.empty
+  }
+
+  @Test
+  public void testRenewedActionFiresOnceAtLeast() throws Throwable {
+    ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+    RenewingAction renewer = new RenewingAction(
+        note1,
+        500,
+        100,
+        TimeUnit.MILLISECONDS,
+        3)
+    queues.schedule(renewer);
+    def stop = new ActionStopQueue(4, TimeUnit.SECONDS)
+    queues.schedule(stop);
+    // this runs all the delayed actions FIRST, so can't be used
+    // to play tricks of renewing actions ahead of the stop action
+    runQueuesToCompletion()
+    assert renewer.executionCount == 1
+    assert note1.executionCount == 1
+    // assert the renewed item is back in
+    assert queues.scheduledActions.contains(renewer)
+  }
+
+
+  @Test
+  public void testRenewingActionOperations() throws Throwable {
+    ActionNoteExecuted note1 = new ActionNoteExecuted("note1", 500)
+    RenewingAction renewer = new RenewingAction(
+        note1,
+        100,
+        100,
+        TimeUnit.MILLISECONDS,
+        3)
+    queues.renewing("note", renewer)
+    assert queues.removeRenewingAction("note")
+    queues.stop()
+    queues.waitForServiceToStop(10000)
+  }
+  
+  public class ActionNoteExecuted extends AsyncAction {
+    public final AtomicBoolean executed = new AtomicBoolean(false);
+    public final AtomicLong executionTimeNanos = new AtomicLong()
+    private final AtomicLong executionCount = new AtomicLong()
+
+    public ActionNoteExecuted(String text, int delay) {
+      super(text, delay);
+    }
+
+    @Override
+    public void execute(
+        SliderAppMaster appMaster,
+        QueueAccess queueService,
+        AppState appState) throws Exception {
+      log.info("Executing $name");
+      executed.set(true);
+      executionTimeNanos.set(System.nanoTime())
+      executionCount.incrementAndGet()
+      log.info(this.toString())
+      
+      synchronized (this) {
+        this.notify();
+      }
+    }
+
+    @Override
+    String toString() {
+      return super.toString() +
+             " executed=${executed.get()}; count=${executionCount.get()};"
+    }
+
+    long getExecutionCount() {
+      return executionCount.get()
+    }
+  }
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateContainerFailure.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
similarity index 77%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateContainerFailure.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index 9c17763..068b876 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateContainerFailure.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -21,8 +21,12 @@
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.ContainerId
+import org.apache.slider.api.ResourceKeys
+import org.apache.slider.core.conf.AggregateConf
+import org.apache.slider.core.conf.MapOperations
 import org.apache.slider.core.exceptions.SliderException
 import org.apache.slider.core.exceptions.TriggerClusterTeardownException
+import org.apache.slider.server.appmaster.actions.ResetFailureWindow
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
@@ -34,12 +38,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestAppStateContainerFailure extends BaseMockAppStateTest
+class TestMockAppStateContainerFailure extends BaseMockAppStateTest
     implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestAppStateContainerFailure"
+    return "TestMockAppStateContainerFailure"
   }
 
   /**
@@ -52,6 +56,15 @@
     return new MockYarnEngine(8000, 4)
   }
 
+  @Override
+  AggregateConf buildInstanceDefinition() {
+    def aggregateConf = super.buildInstanceDefinition()
+    def globalOptions = aggregateConf.resourceOperations.globalOptions
+    globalOptions.put(ResourceKeys.CONTAINER_FAILURE_THRESHOLD, "10")
+    
+    return aggregateConf
+  }
+
   @Test
   public void testShortLivedFail() throws Throwable {
 
@@ -153,7 +166,7 @@
         ContainerId cid = ids[0]
         log.info("$i instance $instances[0] $cid")
         assert cid 
-        appState.onNodeManagerContainerStartFailed(cid, new SliderException("oops"))
+        appState.onNodeManagerContainerStartFailed(cid, new SliderException("failure #${i}"))
         AppState.NodeCompletionResult result = appState.onCompletedNode(containerStatus(cid))
         assert result.containerFailed
       }
@@ -163,4 +176,33 @@
     }
   }
 
+
+  @Test
+  public void testFailureWindow() throws Throwable {
+
+    ResetFailureWindow resetter = new ResetFailureWindow();
+
+    // initial reset
+    resetter.execute(null, null, appState)
+    
+    role0Status.desired = 1
+      for (int i = 0; i < 100; i++) {
+        resetter.execute(null, null, appState)
+        List<RoleInstance> instances = createAndSubmitNodes()
+        assert instances.size() == 1
+
+        List<ContainerId> ids = extractContainerIds(instances, 0)
+
+        ContainerId cid = ids[0]
+        log.info("$i instance $instances[0] $cid")
+        assert cid
+        appState.onNodeManagerContainerStartFailed(
+            cid,
+            new SliderException("failure #${i}"))
+        AppState.NodeCompletionResult result = appState.onCompletedNode(
+            containerStatus(cid))
+        assert result.containerFailed
+      }
+  }
+
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateDynamicRoles.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateDynamicRoles.groovy
similarity index 88%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateDynamicRoles.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateDynamicRoles.groovy
index 6e387d8..136e1ea 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateDynamicRoles.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateDynamicRoles.groovy
@@ -26,9 +26,10 @@
 import org.apache.slider.server.appmaster.model.mock.MockRecordFactory
 import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.AppState
 import org.apache.slider.server.appmaster.state.RoleInstance
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector
 import org.junit.Test
 
 /**
@@ -36,12 +37,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestAppStateDynamicRoles extends BaseMockAppStateTest
+class TestMockAppStateDynamicRoles extends BaseMockAppStateTest
     implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestAppStateDynamicRoles"
+    return "TestMockAppStateDynamicRoles"
   }
 
   /**
@@ -72,11 +73,13 @@
     
     appState.buildInstance(
         instance,
+        new Configuration(),
         new Configuration(false),
         factory.ROLES,
         fs,
         historyPath,
-        null, null)
+        null,
+        null, new SimpleReleaseSelector())
   }
 
   @Test
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestFlexDynamicRoles.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexDynamicRoles.groovy
similarity index 94%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestFlexDynamicRoles.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexDynamicRoles.groovy
index 1693365..5c9dce9 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestFlexDynamicRoles.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexDynamicRoles.groovy
@@ -30,6 +30,7 @@
 import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
 import org.apache.slider.server.appmaster.state.AppState
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector
 import org.apache.slider.server.avro.RoleHistoryWriter
 import org.junit.Test
 
@@ -38,12 +39,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestFlexDynamicRoles extends BaseMockAppStateTest
+class TestMockAppStateFlexDynamicRoles extends BaseMockAppStateTest
     implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestAppStateDynamicRoles"
+    return "TestMockAppStateFlexDynamicRoles"
   }
 
   /**
@@ -73,11 +74,12 @@
 
     
     appState.buildInstance(instance,
+        new Configuration(),
         new Configuration(false),
         factory.ROLES,
         fs,
         historyPath,
-        null, null)
+        null, null, new SimpleReleaseSelector())
   }
 
   
@@ -176,11 +178,12 @@
     appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES)
     appState.buildInstance(
         factory.newInstanceDefinition(0, 0, 0),
+        new Configuration(),
         new Configuration(false),
         factory.ROLES,
         fs,
         historyPath2,
-        null, null)
+        null, null, new SimpleReleaseSelector())
     historyWriter.read(fs, history, appState.roleHistory)
   }
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
new file mode 100644
index 0000000..a7bf068
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateFlexing.groovy
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.model.appstate
+
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.Container
+import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.state.AppState
+import org.apache.slider.server.appmaster.state.ContainerAssignment
+import org.apache.slider.server.appmaster.state.RoleInstance
+import org.junit.Test
+
+@Slf4j
+class TestMockAppStateFlexing extends BaseMockAppStateTest implements MockRoles {
+
+  @Override
+  String getTestName() {
+    return "TestMockAppStateFlexing"
+  }
+
+  @Test
+  public void testFlexDuringLaunchPhase() throws Throwable {
+    role0Status.desired = 1
+
+    List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
+    List<Container> allocations = engine.execute(ops)
+    List<ContainerAssignment> assignments = [];
+    List<AbstractRMOperation> releases = []
+    appState.onContainersAllocated(allocations, assignments, releases)
+    assert assignments.size() == 1
+    ContainerAssignment assigned = assignments[0]
+    Container target = assigned.container
+    RoleInstance ri = roleInstance(assigned)
+
+    ops = appState.reviewRequestAndReleaseNodes()
+    assert ops.empty
+
+    //now this is the start point.
+    appState.containerStartSubmitted(target, ri);
+
+    ops = appState.reviewRequestAndReleaseNodes()
+    assert ops.empty
+
+    RoleInstance ri2 = appState.innerOnNodeManagerContainerStarted(target.id)
+  }
+
+  @Test
+  public void testFlexBeforeAllocationPhase() throws Throwable {
+    role0Status.desired = 1
+
+    List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
+    assert !ops.empty
+    List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
+    assert ops2.empty
+  }
+
+
+  @Test
+  public void testFlexDownTwice() throws Throwable {
+    int r0 = 6
+    int r1 = 0
+    int r2 = 0
+    role0Status.desired = r0
+    role1Status.desired = r1
+    role2Status.desired = r2
+    List<RoleInstance> instances = createAndStartNodes()
+
+    int clusterSize = r0 + r1 + r2
+    assert instances.size() == clusterSize
+    log.info("shrinking cluster")
+    r0 = 4
+    role0Status.desired = r0
+    List<AppState.NodeCompletionResult> completionResults = []
+    instances = createStartAndStopNodes(completionResults)
+    assert instances.size() == 0
+    // assert two nodes were released
+    assert completionResults.size() == 2
+
+    // no-op review
+    completionResults = []
+    instances = createStartAndStopNodes(completionResults)
+    assert instances.size() == 0
+    // assert two nodes were released
+    assert completionResults.size() == 0
+    
+    
+    // now shrink again
+    role0Status.desired = r0 = 1
+    completionResults = []
+    instances = createStartAndStopNodes(completionResults)
+    assert instances.size() == 0
+    // assert two nodes were released
+    assert completionResults.size() == 3
+
+  }
+  
+  
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockRMOperations.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRMOperations.groovy
similarity index 83%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockRMOperations.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRMOperations.groovy
index 168ac9f..f8e852e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockRMOperations.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRMOperations.groovy
@@ -25,6 +25,10 @@
 import org.apache.slider.server.appmaster.model.mock.MockFactory
 import org.apache.slider.server.appmaster.model.mock.MockRMOperationHandler
 import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.RMOperationHandler
 import org.apache.slider.server.appmaster.state.*
 import org.junit.Test
 
@@ -32,11 +36,11 @@
 import static org.apache.slider.server.appmaster.state.ContainerPriority.extractRole
 
 @Slf4j
-class TestMockRMOperations extends BaseMockAppStateTest implements MockRoles {
+class TestMockAppStateRMOperations extends BaseMockAppStateTest implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestMockRMOperations"
+    return "TestMockAppStateRMOperations"
   }
 
   @Test
@@ -173,41 +177,4 @@
     assert ri3 == null
   }
 
-  @Test
-  public void testFlexDuringLaunchPhase() throws Throwable {
-    role0Status.desired = 1
-
-    List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
-    List<Container> allocations = engine.execute(
-        ops)
-    List<ContainerAssignment> assignments = [];
-    List<AbstractRMOperation> releases = []
-    appState.onContainersAllocated(allocations, assignments, releases)
-    assert assignments.size() == 1
-    ContainerAssignment assigned = assignments[0]
-    Container target = assigned.container
-    RoleInstance ri = roleInstance(assigned)
-
-    ops = appState.reviewRequestAndReleaseNodes()
-    assert ops.empty
-
-    //now this is the start point.
-    appState.containerStartSubmitted(target, ri);
-
-    ops = appState.reviewRequestAndReleaseNodes()
-    assert ops.empty
-
-    RoleInstance ri2 = appState.innerOnNodeManagerContainerStarted(target.id)
-  }
-
-  @Test
-  public void testFlexBeforeAllocationPhase() throws Throwable {
-    role0Status.desired = 1
-
-    List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
-    assert !ops.empty
-    List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
-    assert ops2.empty
-  }
-
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRebuildOnAMRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRebuildOnAMRestart.groovy
similarity index 93%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRebuildOnAMRestart.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRebuildOnAMRestart.groovy
index 190e927..c2783f3 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRebuildOnAMRestart.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRebuildOnAMRestart.groovy
@@ -26,6 +26,7 @@
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockRecordFactory
 import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.*
 import org.junit.Test
 
@@ -34,12 +35,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestAppStateRebuildOnAMRestart extends BaseMockAppStateTest
+class TestMockAppStateRebuildOnAMRestart extends BaseMockAppStateTest
     implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestAppStateRebuildOnAMRestart"
+    return "TestMockAppStateRebuildOnAMRestart"
   }
 
   @Test
@@ -71,11 +72,13 @@
     //and rebuild
     appState.buildInstance(
         factory.newInstanceDefinition(r0, r1, r2),
+        new Configuration(),
         new Configuration(false),
         factory.ROLES,
         fs,
         historyPath,
-        containers, null)
+        containers,
+        null, new SimpleReleaseSelector())
 
     assert appState.getStartedCountainerCount() == clusterSize
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRolePlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRolePlacement.groovy
similarity index 91%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRolePlacement.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRolePlacement.groovy
index fba1ea0..17ebc31 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRolePlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRolePlacement.groovy
@@ -24,6 +24,9 @@
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
 import org.apache.slider.server.appmaster.state.*
 import org.junit.Test
 
@@ -35,12 +38,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestAppStateRolePlacement extends BaseMockAppStateTest
+class TestMockAppStateRolePlacement extends BaseMockAppStateTest
     implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestAppStateRolePlacement"
+    return "TestMockAppStateRolePlacement"
   }
 
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRoleRelease.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRoleRelease.groovy
similarity index 93%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRoleRelease.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRoleRelease.groovy
index f087a30..addfaa5 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestAppStateRoleRelease.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateRoleRelease.groovy
@@ -24,7 +24,7 @@
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.RoleInstance
 import org.junit.Test
 
@@ -33,12 +33,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestAppStateRoleRelease extends BaseMockAppStateTest
+class TestMockAppStateRoleRelease extends BaseMockAppStateTest
     implements MockRoles {
 
   @Override
   String getTestName() {
-    return "TestAppStateRolePlacement"
+    return "TestMockAppStateRoleRelease"
   }
 
   /**
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestContainerResourceAllocations.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockContainerResourceAllocations.groovy
similarity index 93%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestContainerResourceAllocations.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockContainerResourceAllocations.groovy
index a0b1100..73d40ee 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestContainerResourceAllocations.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockContainerResourceAllocations.groovy
@@ -26,8 +26,8 @@
 import org.apache.slider.core.conf.ConfTreeOperations
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockRoles
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
-import org.apache.slider.server.appmaster.state.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
 import org.junit.Test
 
 /**
@@ -35,11 +35,11 @@
  */
 @CompileStatic
 @Slf4j
-class TestContainerResourceAllocations extends BaseMockAppStateTest {
+class TestMockContainerResourceAllocations extends BaseMockAppStateTest {
 
   @Override
   String getTestName() {
-    "TestContainerResourceAllocations"
+    "TestMockContainerResourceAllocations"
   }
 
   @Test
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForRelease.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForRelease.groovy
deleted file mode 100644
index 92915dd..0000000
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForRelease.groovy
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.slider.server.appmaster.model.history
-
-import groovy.transform.CompileStatic
-import groovy.util.logging.Slf4j
-import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
-import org.apache.slider.server.appmaster.model.mock.MockFactory
-import org.apache.slider.server.appmaster.state.NodeInstance
-import org.apache.slider.server.appmaster.state.NodeMap
-import org.junit.Before
-import org.junit.Test
-
-@Slf4j
-@CompileStatic
-class TestFindNodesForRelease extends BaseMockAppStateTest {
-
-
-  @Override
-  String getTestName() {
-    return "TestFindNodesForRelease"
-  }
-  NodeInstance age1Active4 = nodeInstance(1, 4, 0, 0)
-  NodeInstance age2Active2 = nodeInstance(2, 2, 0, 0)
-  NodeInstance age3Active0 = nodeInstance(3, 0, 0, 0)
-  NodeInstance age4Active1 = nodeInstance(4, 1, 0, 0)
-  NodeInstance empty = new NodeInstance("empty", MockFactory.ROLE_COUNT)
-
-  List<NodeInstance> nodes = [age2Active2, age4Active1, age1Active4, age3Active0]
-  NodeMap nodeMap = new NodeMap(MockFactory.ROLE_COUNT);
-
-
-  @Before
-  public void setupNodeMap() {
-    nodeMap.insert(nodes)
-  }
-
-  private void assertReleased(
-      int count,
-      List<NodeInstance> expected,
-      int role = 0) {
-    List<NodeInstance> released = nodeMap.findNodesForRelease(role, count)
-    assertListEquals(released, expected)
-  }
-  private void assertReleased(
-      List<NodeInstance> expected,
-      int role = 0) {
-    List<NodeInstance> released = nodeMap.findNodesForRelease(role, expected.size())
-    assertListEquals(released, expected)
-  }
-
-  @Test
-  public void testListActiveNodes() throws Throwable {
-    assertListEquals(nodeMap.listActiveNodes(0),
-                     [age1Active4,age2Active2, age4Active1])
-  }
-  
-  @Test
-  public void testReleaseMinus1() throws Throwable {
-    try {
-      nodeMap.findNodesForRelease(0, -1)
-      fail("Expected an exception")
-    } catch (IllegalArgumentException e) {
-    }
-  }  
-  @Test
-  public void testReleaseO() throws Throwable {
-    assertReleased(0, [])
-  }
-
-  @Test
-  public void testRelease1() throws Throwable {
-    assertReleased(1, [age1Active4])
-  }
-
-  @Test
-  public void testRelease2() throws Throwable {
-    assertReleased(2, [age1Active4, age1Active4])
-  }
-
-  @Test
-  public void testRelease3() throws Throwable {
-    assertReleased(3, [age1Active4, age1Active4, age1Active4 ])
-  }
-
-  @Test
-  public void testRelease4() throws Throwable {
-    assertReleased(4, [age1Active4, age1Active4, age1Active4 , age2Active2])
-  }
-
-  @Test
-  public void testRelease5() throws Throwable {
-    assertReleased([age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1])
-  }
-
-  @Test
-  public void testRelease6() throws Throwable {
-    assertReleased(
-           [age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1, age1Active4])
-  }
-
-  @Test
-  public void testRelease7() throws Throwable {
-    assertReleased(
-           [age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1,
-               age1Active4, age2Active2])
-  }
-
-  @Test
-  public void testRelease8() throws Throwable {
-    assertReleased(8,
-           [age1Active4, age1Active4, age1Active4 , age2Active2, age4Active1,
-               age1Active4, age2Active2])
-  }
-
-  @Test
-  public void testPurgeInactiveTime3() throws Throwable {
-    assert nodeMap.purgeUnusedEntries(3) == 0;
-  }
-
-  @Test
-  public void testPurgeInactiveTime4() throws Throwable {
-    assert nodeMap.purgeUnusedEntries(4) == 1;
-  }
-  @Test
-  public void testPurgeInactiveTime5() throws Throwable {
-    assert nodeMap.purgeUnusedEntries(5) == 1;
-  }
-
-}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
index 795b48f..340e72d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
@@ -114,12 +114,12 @@
     RoleStatus roleStatus = new RoleStatus(provRole)
 
     //verify it is empty
-    assert roleHistory.findNodesForRelease(role, 1).isEmpty()
+    assert roleHistory.listActiveNodes(role).empty
 
     AMRMClient.ContainerRequest request =
         roleHistory.requestNode(roleStatus, resource);
 
-    List<String> nodes = request.getNodes()
+    List<String> nodes = request.nodes
     assert nodes == null
 
     //pick an idle host
@@ -128,7 +128,7 @@
     //build a container
     MockContainer container = factory.newContainer()
     container.nodeId = new MockNodeId(hostname, 0)
-    container.priority = request.getPriority()
+    container.priority = request.priority
     roleHistory.onContainerAssigned(container);
 
     NodeMap nodemap = roleHistory.cloneNodemap();
@@ -147,10 +147,11 @@
     assert roleEntry.live == 1
 
     // now pick that instance to destroy
+    List<NodeInstance> activeNodes = roleHistory.listActiveNodes(role)
 
-    List<NodeInstance> forRelease = roleHistory.findNodesForRelease(role, 1)
-    assert forRelease.size() == 1
-    NodeInstance target = forRelease[0]
+
+    assert activeNodes.size() == 1
+    NodeInstance target = activeNodes[0]
     assert target == allocated
     roleHistory.onContainerReleaseSubmitted(container);
     assert roleEntry.releasing == 1
@@ -158,19 +159,19 @@
     assert roleEntry.active == 0
 
     // release completed
-    roleHistory.onReleaseCompleted(container)
+    roleHistory.onReleaseCompleted(container, true)
     assert roleEntry.releasing == 0
     assert roleEntry.live == 0
     assert roleEntry.active == 0
 
     // verify it is empty
-    assert roleHistory.findNodesForRelease(role, 1).isEmpty()
+    assert roleHistory.listActiveNodes(role).empty
 
     // ask for a container and expect to get the recently released one
     AMRMClient.ContainerRequest request2 =
         roleHistory.requestNode(roleStatus, resource);
 
-    List<String> nodes2 = request2.getNodes()
+    List<String> nodes2 = request2.nodes
     assert nodes2 != null
     String hostname2 = nodes2[0]
 
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForNewInstances.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
similarity index 98%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForNewInstances.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
index dab03f5..79cd348 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestFindNodesForNewInstances.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
@@ -38,7 +38,7 @@
  */
 @Slf4j
 @CompileStatic
-class TestFindNodesForNewInstances extends BaseMockAppStateTest {
+class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
 
   @Override
   String getTestName() {
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestNIComparators.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
similarity index 97%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestNIComparators.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
index 77119d5..612cce8 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestNIComparators.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
@@ -26,7 +26,7 @@
 /**
  * Unit test to verify the comparators sort as expected
  */
-class TestNIComparators extends BaseMockAppStateTest  {
+class TestRoleHistoryNIComparators extends BaseMockAppStateTest  {
 
   NodeInstance age1Active4 = nodeInstance(1000, 4, 0, 0)
   NodeInstance age2Active2 = nodeInstance(1001, 2, 0, 0)
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestOutstandingRequestTracker.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
similarity index 96%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestOutstandingRequestTracker.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
index 8d1f4b0..7085678 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestOutstandingRequestTracker.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
@@ -24,7 +24,7 @@
 import org.apache.slider.server.appmaster.state.OutstandingRequestTracker
 import org.junit.Test
 
-class TestOutstandingRequestTracker extends BaseMockAppStateTest {
+class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest {
 
   NodeInstance host1 = new NodeInstance("host1", 3)
   NodeInstance host2 = new NodeInstance("host2", 3)
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRW.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRW.groovy
similarity index 99%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRW.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRW.groovy
index b646661..4242ba1 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRW.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRW.groovy
@@ -31,8 +31,8 @@
 import org.junit.Test
 
 @Slf4j
-@CompileStatic
-class TestHistoryRW extends BaseMockAppStateTest {
+//@CompileStatic
+class TestRoleHistoryRW extends BaseMockAppStateTest {
 
   static long time = System.currentTimeMillis();
   
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRWOrdering.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRWOrdering.groovy
similarity index 98%
rename from slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRWOrdering.groovy
rename to slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRWOrdering.groovy
index 6ec046c..a0663e8 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestHistoryRWOrdering.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRWOrdering.groovy
@@ -33,7 +33,7 @@
 import java.util.regex.Pattern
 
 @Slf4j
-class TestHistoryRWOrdering extends BaseMockAppStateTest {
+class TestRoleHistoryRWOrdering extends BaseMockAppStateTest {
 
   def paths = pathlist(
       [
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index 628c729..f96a238 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -30,7 +30,9 @@
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.slider.common.tools.SliderFileSystem
 import org.apache.slider.common.tools.SliderUtils
+import org.apache.slider.core.conf.AggregateConf
 import org.apache.slider.core.main.LauncherExitCodes
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.*
 import org.apache.slider.test.SliderTestBase
 import org.junit.Before
@@ -81,15 +83,31 @@
     appState = new AppState(new MockRecordFactory())
     appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES)
     appState.buildInstance(
-        factory.newInstanceDefinition(0, 0, 0),
+        buildInstanceDefinition(),
+        new Configuration(),
         new Configuration(false),
         factory.ROLES,
         fs,
         historyPath,
-        null, null)
+        null, null,
+        new SimpleReleaseSelector())
   }
 
-  abstract String getTestName();
+  /**
+   * Override point, define the instance definition
+   * @return
+   */
+  public AggregateConf buildInstanceDefinition() {
+    factory.newInstanceDefinition(0, 0, 0)
+  }
+
+  /**
+   * Get the test name ... defaults to method name
+   * @return
+   */
+  String getTestName() {
+    methodName.methodName;
+  }
 
   public RoleStatus getRole0Status() {
     return appState.lookupRoleStatus(ROLE0)
@@ -166,20 +184,72 @@
    * @return a list of roles
    */
   protected List<RoleInstance> createAndStartNodes() {
-    List<RoleInstance> instances = createAndSubmitNodes()
+    return createStartAndStopNodes([])
+  }
+
+  /**
+   * Create, Start and stop nodes
+   * @param completionResults List filled in with the status on all completed nodes
+   * @return the nodes
+   */
+  public List<RoleInstance> createStartAndStopNodes(
+      List<AppState.NodeCompletionResult> completionResults) {
+    List<ContainerId> released = []
+    List<RoleInstance> instances = createAndSubmitNodes(released)
     for (RoleInstance instance : instances) {
       assert appState.onNodeManagerContainerStarted(instance.containerId)
     }
+    releaseContainers(completionResults,
+        released,
+        ContainerState.COMPLETE,
+        "released",
+        0
+    )
     return instances
   }
 
   /**
+   * Release a list of containers, updating the completion results
+   * @param completionResults
+   * @param containerIds
+   * @param containerState
+   * @param exitText
+   * @param containerExitCode
+   * @return
+   */
+  public def releaseContainers(
+      List<AppState.NodeCompletionResult> completionResults,
+      List<ContainerId> containerIds,
+      ContainerState containerState,
+      String exitText,
+      int containerExitCode) {
+    containerIds.each { ContainerId id ->
+      ContainerStatus status = ContainerStatus.newInstance(id,
+          containerState,
+          exitText,
+          containerExitCode)
+      completionResults << appState.onCompletedNode(status)
+
+    }
+  }
+
+  /**
    * Create nodes and submit them
    * @return a list of roles
    */
   public List<RoleInstance> createAndSubmitNodes() {
+    return createAndSubmitNodes([])
+  }
+
+  /**
+   * Create nodes and submit them
+   * @param released a list that is built up of all released nodes
+   * @return a list of roles allocated
+   */
+  public List<RoleInstance> createAndSubmitNodes(
+      List<ContainerId> released) {
     List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
-    List<Container> allocatedContainers = engine.execute(ops)
+    List<Container> allocatedContainers = engine.execute(ops, released)
     List<ContainerAssignment> assignments = [];
     List<AbstractRMOperation> operations = []
     appState.onContainersAllocated(allocatedContainers, assignments, operations)
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy
index 25bee36..3eba7c4 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockContainer.groovy
@@ -20,7 +20,7 @@
 
 import org.apache.hadoop.yarn.api.records.*
 
-class MockContainer extends Container{
+class MockContainer extends Container {
   
   ContainerId id;
   NodeId nodeId
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
index 7b73451..6db1ac5 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
@@ -18,11 +18,15 @@
 
 package org.apache.slider.server.appmaster.model.mock
 
+import java.io.IOException;
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.service.LifecycleEvent
 import org.apache.hadoop.service.ServiceStateChangeListener
 import org.apache.hadoop.yarn.api.records.Container
+import org.apache.hadoop.yarn.api.records.ContainerId
+import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.common.tools.SliderFileSystem
 import org.apache.slider.core.conf.AggregateConf
@@ -33,7 +37,9 @@
 import org.apache.slider.core.registry.info.ServiceInstanceData
 import org.apache.slider.providers.ProviderRole
 import org.apache.slider.providers.ProviderService
-import org.apache.slider.server.appmaster.AMViewForProviders
+import org.apache.slider.server.appmaster.actions.QueueAccess
+import org.apache.slider.server.appmaster.state.ContainerReleaseSelector
+import org.apache.slider.server.appmaster.state.MostRecentContainerReleaseSelector
 import org.apache.slider.server.appmaster.state.StateAccessForProviders
 import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations
 import org.apache.slider.server.appmaster.web.rest.agent.HeartBeat
@@ -63,7 +69,8 @@
   }
 
   @Override
-  public void validateInstanceDefinition(AggregateConf instanceDefinition) throws SliderException {
+  public void validateInstanceDefinition(AggregateConf instanceDefinition)
+  throws SliderException {
   }
 
   @Override
@@ -99,7 +106,7 @@
     return null
   }
 
-    @Override
+  @Override
   public long getStartTime() {
     return 0;
   }
@@ -130,7 +137,7 @@
   }
 
   @Override
-  public Map<String,String> getBlockers() {
+  public Map<String, String> getBlockers() {
     return null;
   }
 
@@ -155,11 +162,17 @@
   }
 
   @Override
-  public Configuration loadProviderConfigurationInformation(File confDir) throws BadCommandArgumentsException, IOException {
+  public Configuration loadProviderConfigurationInformation(File confDir)
+  throws BadCommandArgumentsException, IOException {
     return null;
   }
 
   @Override
+  void initializeApplicationConfiguration(AggregateConf instanceDefinition,
+      SliderFileSystem fileSystem) throws IOException, SliderException {
+  }
+
+  @Override
   public void validateApplicationConfiguration(
       AggregateConf instanceDefinition,
       File confDir,
@@ -168,7 +181,7 @@
 
 
   @Override
-  public Map<String,String> buildProviderStatus() {
+  public Map<String, String> buildProviderStatus() {
     return null;
   }
 
@@ -187,7 +200,8 @@
   }
 
   @Override
-  public Map<String, String> buildMonitorDetails(ClusterDescription clusterSpec) {
+  public Map<String, String> buildMonitorDetails(
+      ClusterDescription clusterSpec) {
     return null;
   }
 
@@ -195,28 +209,29 @@
   void bind(
       StateAccessForProviders stateAccessor,
       RegistryViewForProviders registry,
-      AMViewForProviders amView) {
+      QueueAccess queueAccess,
+      List<Container> liveContainers) {
 
   }
 
   @Override
-    AgentRestOperations getAgentRestOperations() {
-        return new AgentRestOperations() {
-            @Override
-            public RegistrationResponse handleRegistration(Register registration) {
-                // dummy impl
-                RegistrationResponse response = new RegistrationResponse();
-                response.setResponseStatus(RegistrationStatus.OK);
-                return response;
-            }
+  AgentRestOperations getAgentRestOperations() {
+    return new AgentRestOperations() {
+      @Override
+      public RegistrationResponse handleRegistration(Register registration) {
+        // dummy impl
+        RegistrationResponse response = new RegistrationResponse();
+        response.setResponseStatus(RegistrationStatus.OK);
+        return response;
+      }
 
-            @Override
-            public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
-                // dummy impl
-                return new HeartBeatResponse();
-            }
-        }
+      @Override
+      public HeartBeatResponse handleHeartBeat(HeartBeat heartBeat) {
+        // dummy impl
+        return new HeartBeatResponse();
+      }
     }
+  }
 
   @Override
   void buildEndpointDetails(Map<String, String> details) {
@@ -225,8 +240,34 @@
 
   @Override
   void applyInitialRegistryDefinitions(
-          URL unsecureWebAPI, URL secureWebAPI, ServiceInstanceData registryInstanceData)
+      URL unsecureWebAPI,
+      URL secureWebAPI,
+      ServiceInstanceData registryInstanceData)
   throws MalformedURLException, IOException {
 
   }
+
+  @Override
+  public void notifyContainerCompleted(ContainerId containerId) {
+  }
+
+  @Override
+  ContainerReleaseSelector createContainerReleaseSelector() {
+    return new MostRecentContainerReleaseSelector()
+  }
+
+  @Override
+  public void releaseAssignedContainer(ContainerId containerId) {
+    // no-op
+  }
+
+  @Override
+  public void addContainerRequest(AMRMClient.ContainerRequest req) {
+    // no-op
+  }
+
+  @Override
+  void rebuildContainerDetails(List<Container> liveContainers, String applicationId,
+      Map<Integer, ProviderRole> roleProviderMap) {
+  }
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
index 10a7708..0fdba6b 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
@@ -21,25 +21,28 @@
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.ContainerId
 import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
-import org.apache.slider.server.appmaster.state.ContainerReleaseOperation
-import org.apache.slider.server.appmaster.state.ContainerRequestOperation
-import org.apache.slider.server.appmaster.state.RMOperationHandler
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.RMOperationHandler
 
 @Slf4j
 class MockRMOperationHandler extends RMOperationHandler {
   public List<AbstractRMOperation> operations = [];
-  
+  int requests, releases;
+
   @Override
   public void releaseAssignedContainer(ContainerId containerId) {
     operations.add(new ContainerReleaseOperation(containerId))
     log.info("Releasing container ID " + containerId.getId())
+    releases++;
   }
 
   @Override
   public void addContainerRequest(AMRMClient.ContainerRequest req) {
     operations.add(new ContainerRequestOperation(req))
     log.info("Requesting container role #" + req.priority);
+    requests++;
   }
 
   /**
@@ -47,5 +50,7 @@
    */
   public void clear() {
     operations.clear()
+    releases = 0;
+    requests = 0;
   }
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy
index 7ebdf52..f405188 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockYarnEngine.groovy
@@ -25,9 +25,9 @@
 import org.apache.hadoop.yarn.api.records.Container
 import org.apache.hadoop.yarn.api.records.ContainerId
 import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.slider.server.appmaster.state.AbstractRMOperation
-import org.apache.slider.server.appmaster.state.ContainerReleaseOperation
-import org.apache.slider.server.appmaster.state.ContainerRequestOperation
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation
 
 /**
  * This is an evolving engine to mock YARN operations
@@ -86,9 +86,7 @@
    * @param ops
    * @return
    */
-  List<Container> execute(
-      List<AbstractRMOperation> ops
-      ) {
+  List<Container> execute(List<AbstractRMOperation> ops) {
     return execute(ops, [])
   }
 
@@ -106,7 +104,7 @@
       if (op instanceof ContainerReleaseOperation) {
         ContainerReleaseOperation cro = (ContainerReleaseOperation) op
         ContainerId cid = cro.containerId
-        releaseContainer(cid);
+        assert releaseContainer(cid);
         released.add(cid)
       } else {
         ContainerRequestOperation req = (ContainerRequestOperation) op
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
new file mode 100644
index 0000000..c789011
--- /dev/null
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.model.monkey
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.server.appmaster.actions.ActionHalt
+import org.apache.slider.server.appmaster.actions.ActionKillContainer
+import org.apache.slider.server.appmaster.actions.QueueService
+import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import org.apache.slider.server.appmaster.model.mock.MockRMOperationHandler
+import org.apache.slider.server.appmaster.monkey.ChaosKillAM
+import org.apache.slider.server.appmaster.monkey.ChaosKillContainer
+import org.apache.slider.server.appmaster.monkey.ChaosMonkeyService
+import org.apache.slider.server.appmaster.monkey.ChaosTarget
+import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation
+import org.apache.slider.server.appmaster.state.RoleInstance
+import org.junit.Before
+import org.junit.Test
+
+@CompileStatic
+@Slf4j
+class TestMockMonkey extends BaseMockAppStateTest {
+
+  /**
+   * This queue service is NOT started; tests need to poll the queue
+   * rather than expect them to execute
+   */
+  QueueService queues = new QueueService();
+  ChaosMonkeyService monkey = new ChaosMonkeyService(metricRegistry,
+  queues)
+
+  @Before
+  public void init() {
+    def configuration = new YarnConfiguration()
+    queues.init(configuration)
+    monkey.init(configuration)
+  }
+  
+  @Test
+  public void testMonkeyStart() throws Throwable {
+    monkey.start()
+    monkey.stop()
+  }
+
+
+  @Test
+  public void testMonkeyPlay() throws Throwable {
+    ChaosCounter counter = new ChaosCounter()
+    monkey.addTarget("target", counter, ChaosMonkeyService.PERCENT_100)
+    
+    monkey.play()
+    assert counter.count == 1
+  }
+
+  @Test
+  public void testMonkeyPlaySometimes() throws Throwable {
+    ChaosCounter counter = new ChaosCounter()
+    ChaosCounter counter2 = new ChaosCounter()
+    monkey.addTarget("target1", counter, ChaosMonkeyService.PERCENT_1 * 50)
+    monkey.addTarget("target2", counter2, ChaosMonkeyService.PERCENT_1 * 25)
+
+    for (int i = 0; i < 100; i++) {
+      monkey.play()
+    }
+    log.info("Counter1 = ${counter.count} counter2 = ${counter2.count}")
+    /*
+     * Relying on probability here to give approximate answers 
+     */
+    assert counter.count > 25 
+    assert counter.count < 75 
+    assert counter2.count < counter.count 
+  }
+
+  @Test
+  public void testAMKiller() throws Throwable {
+
+    def chaos = new ChaosKillAM(queues, -1)
+    chaos.chaosAction();
+    assert queues.scheduledActions.size() == 1
+    def action = queues.scheduledActions.take()
+    assert action instanceof ActionHalt
+  }
+  
+  
+  @Test
+  public void testContainerKillerEmptyApp() throws Throwable {
+
+    
+    def chaos = new ChaosKillContainer(appState,
+        queues,
+        new MockRMOperationHandler())
+    chaos.chaosAction();
+    assert queues.scheduledActions.size() == 0
+  }
+  
+   
+  
+  @Test
+  public void testContainerKiller() throws Throwable {
+    MockRMOperationHandler ops = new MockRMOperationHandler();
+    role0Status.desired = 1
+    List<RoleInstance> instances = createAndStartNodes()
+    assert instances.size() == 1
+    def instance = instances[0]
+    
+    def chaos = new ChaosKillContainer(appState, queues, ops)
+    chaos.chaosAction();
+    assert queues.scheduledActions.size() == 1
+    def action = queues.scheduledActions.take()
+    ActionKillContainer killer = (ActionKillContainer) action
+    assert killer.containerId == instance.containerId;
+    killer.execute(null, queues, appState)
+    assert ops.releases == 1;
+
+    ContainerReleaseOperation operation = (ContainerReleaseOperation) ops.operations[0]
+    assert operation.containerId == instance.containerId
+  }
+  
+  
+
+  /**
+   * Chaos target that just implement a counter
+   */
+  private static class ChaosCounter implements ChaosTarget {
+    int count;
+    
+    @Override
+    void chaosAction() {
+      count++;
+    }
+
+
+    @Override
+    public String toString() {
+      final StringBuilder sb = new StringBuilder(
+          "ChaosCounter{");
+      sb.append("count=").append(count);
+      sb.append('}');
+      return sb.toString();
+    }
+  }
+}
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
index cd7c9d8..c1732e6 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
@@ -47,9 +47,8 @@
 
   @Test
   public void testRestURIs() throws Throwable {
-    def clustername = "test_publisherws"
-    createMiniCluster(
-        clustername,
+    String clustername = createMiniCluster(
+        "",
         configuration,
         1,
         1,
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy
index d632b25..28b484f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy
@@ -18,8 +18,16 @@
 
 package org.apache.slider.test
 
+import com.codahale.metrics.MetricRegistry
 import groovy.transform.CompileStatic
+import org.apache.hadoop.fs.FileUtil
+import org.apache.slider.common.SliderXMLConfKeysForTesting
 import org.junit.Before
+import org.junit.BeforeClass
+import org.junit.Rule
+import org.junit.rules.TestName
+
+import java.nio.file.Files
 
 /**
  * Base class for unit tests as well as ones starting mini clusters
@@ -31,10 +39,25 @@
 @CompileStatic
 public abstract class SliderTestBase extends SliderTestUtils {
 
+  /**
+   * Singleton metric registry
+   */
+  public static final MetricRegistry metricRegistry = new MetricRegistry()
+  
+  @Rule
+  public TestName methodName = new TestName();
+
+  @BeforeClass
+  public static void nameThread() {
+    Thread.currentThread().setName("JUnit");
+  }
+
   @Before
   public void setup() {
-    //give our thread a name
-    Thread.currentThread().name = "JUnit"
+    FileUtil.fullyDelete(new File(SliderXMLConfKeysForTesting.TEST_SECURITY_DIR))
   }
 
+  
+  
+
 }
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
index a250e55..3fc3e55 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
@@ -205,7 +205,7 @@
   public static void waitUntilClusterLive(SliderClient client, int timeout) {
     Duration duration = new Duration(timeout);
     duration.start()
-    while (!client.actionExists(client.deployedClusterName, true) &&
+    while (0 != client.actionExists(client.deployedClusterName, true) &&
            !duration.limitExceeded) {
       sleep(1000);
     }
@@ -480,6 +480,8 @@
       List args) {
     ServiceLauncher<SliderClient> serviceLauncher =
         new ServiceLauncher<SliderClient>(SliderClient.name);
+
+    log.debug("slider ${SliderUtils.join(args, " ", false)}")
     serviceLauncher.launchService(conf,
         toArray(args),
         false);
@@ -492,6 +494,8 @@
       Throwable {
     ServiceLauncher serviceLauncher =
         new ServiceLauncher(serviceClass.name);
+    log.debug("slider ${SliderUtils.join(args, " ", false)}")
+
     serviceLauncher.launchService(conf,
         toArray(args),
         false);
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
index 2c1b270..b6f863b 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
@@ -136,13 +136,8 @@
     }
   } 
 
-
-  @Rule
-  public TestName methodName = new TestName();
-
-  @Before
-  public void nameThread() {
-    Thread.currentThread().setName("JUnit");
+  protected String buildClustername(String clustername) {
+    return clustername ?: createClusterName()
   }
 
   /**
@@ -152,7 +147,7 @@
    */
   protected String createClusterName() {
     def base = methodName.getMethodName().toLowerCase(Locale.ENGLISH)
-    if (clusterCount++>1) {
+    if (clusterCount++ > 1) {
       base += "-$clusterCount"
     }
     return base
@@ -162,7 +157,7 @@
   @Override
   void setup() {
     super.setup()
-    def testConf = getTestConfiguration();
+    def testConf = testConfiguration;
     thawWaitTime = getTimeOptionMillis(testConf,
         KEY_TEST_THAW_WAIT_TIME,
         thawWaitTime)
@@ -230,15 +225,16 @@
 
   /**
    * Create and start a minicluster
-   * @param name cluster/test name
+   * @param name cluster/test name; if empty one is created from the junit method
    * @param conf configuration to use
    * @param noOfNodeManagers #of NMs
    * @param numLocalDirs #of local dirs
    * @param numLogDirs #of log dirs
    * @param startZK create a ZK micro cluster
    * @param startHDFS create an HDFS mini cluster
+   * @return the name of the cluster
    */
-  protected void createMiniCluster(String name,
+  protected String createMiniCluster(String name,
                                    YarnConfiguration conf,
                                    int noOfNodeManagers,
                                    int numLocalDirs,
@@ -247,12 +243,14 @@
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 64);
     conf.set(YarnConfiguration.RM_SCHEDULER, FIFO_SCHEDULER);
     SliderUtils.patchConfiguration(conf)
+    name = buildClustername(name)
     miniCluster = new MiniYARNCluster(name, noOfNodeManagers, numLocalDirs, numLogDirs)
     miniCluster.init(conf)
     miniCluster.start();
     if (startHDFS) {
       createMiniHDFSCluster(name, conf)
     }
+    return name
   }
 
   /**
@@ -441,12 +439,16 @@
    * @param clusterOps map of key=value cluster options to set with the --option arg
    * @return launcher which will have executed the command.
    */
-  public ServiceLauncher<SliderClient> createOrBuildCluster(String action, String clustername, Map<String, Integer> roles, List<String> extraArgs, boolean deleteExistingData, boolean blockUntilRunning, Map<String, String> clusterOps) {
+  public ServiceLauncher<SliderClient> createOrBuildCluster(String action, String clustername,
+    Map<String, Integer> roles, List<String> extraArgs, boolean deleteExistingData,
+    boolean blockUntilRunning, Map<String, String> clusterOps) {
     assert clustername != null
     assert miniCluster != null
-    if (deleteExistingData) {
-      HadoopFS dfs = HadoopFS.get(new URI(fsDefaultName), miniCluster.config)
-      Path clusterDir = new SliderFileSystem(dfs, miniCluster.config).buildClusterDirPath(clustername)
+    // update action should keep existing data
+    def config = miniCluster.config
+    if (deleteExistingData && !SliderActions.ACTION_UPDATE.equals(action)) {
+      HadoopFS dfs = HadoopFS.get(new URI(fsDefaultName), config)
+      Path clusterDir = new SliderFileSystem(dfs, config).buildClusterDirPath(clustername)
       log.info("deleting customer data at $clusterDir")
       //this is a safety check to stop us doing something stupid like deleting /
       assert clusterDir.toString().contains("/.slider/")
@@ -485,7 +487,7 @@
     }
     ServiceLauncher<SliderClient> launcher = launchClientAgainstMiniMR(
         //config includes RM binding info
-        new YarnConfiguration(miniCluster.config),
+        new YarnConfiguration(config),
         //varargs list of command line params
         argsList
     )
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy
index 77d47f4..0259fb7 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/YarnZKMiniClusterTestBase.groovy
@@ -95,19 +95,20 @@
    * @param startZK create a ZK micro cluster
    * @param startHDFS create an HDFS mini cluster
    */
-  protected void createMiniCluster(String name,
+  protected String createMiniCluster(String name,
                                    YarnConfiguration conf,
                                    int noOfNodeManagers,
                                    int numLocalDirs,
                                    int numLogDirs,
                                    boolean startZK,
                                    boolean startHDFS) {
-    createMiniCluster(name, conf, noOfNodeManagers, numLocalDirs, numLogDirs,
+    name = createMiniCluster(name, conf, noOfNodeManagers, numLocalDirs, numLogDirs,
         startHDFS)
 
     if (startZK) {
       createMicroZKCluster(conf)
     }
+    return name
   }
 
   /**
@@ -117,11 +118,24 @@
    * @param noOfNodeManagers #of NMs
    * @param startZK create a ZK micro cluster
    */
-  protected void createMiniCluster(String name,
+  protected String createMiniCluster(String name,
                                    YarnConfiguration conf,
                                    int noOfNodeManagers,
                                    boolean startZK) {
-    createMiniCluster(name, conf, noOfNodeManagers, 1, 1, startZK, false)
+    return createMiniCluster(name, conf, noOfNodeManagers, 1, 1, startZK, false)
+  }
+
+  /**
+   * Create and start a minicluster with the name from the test method
+   * @param name cluster/test name
+   * @param conf configuration to use
+   * @param noOfNodeManagers #of NMs
+   * @param startZK create a ZK micro cluster
+   */
+  protected String createMiniCluster(YarnConfiguration conf,
+      int noOfNodeManagers,
+      boolean startZK) {
+    return createMiniCluster("", conf, noOfNodeManagers, 1, 1, startZK, false)
   }
 
   public void createMicroZKCluster(Configuration conf) {
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java
index c10b60a..6ed950f 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestAgentProviderService.java
@@ -18,24 +18,21 @@
 
 package org.apache.slider.providers.agent;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FilterFileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.Token;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.ClusterDescriptionKeys;
 import org.apache.slider.api.ClusterNode;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.OptionKeys;
-import org.apache.slider.api.StatusKeys;
+import org.apache.slider.common.SliderXmlConfKeys;
 import org.apache.slider.common.tools.SliderFileSystem;
 import org.apache.slider.core.conf.AggregateConf;
 import org.apache.slider.core.conf.ConfTree;
@@ -46,6 +43,7 @@
 import org.apache.slider.providers.agent.application.metadata.Application;
 import org.apache.slider.providers.agent.application.metadata.CommandOrder;
 import org.apache.slider.providers.agent.application.metadata.Component;
+import org.apache.slider.providers.agent.application.metadata.ComponentExport;
 import org.apache.slider.providers.agent.application.metadata.Export;
 import org.apache.slider.providers.agent.application.metadata.ExportGroup;
 import org.apache.slider.providers.agent.application.metadata.Metainfo;
@@ -57,6 +55,7 @@
 import org.apache.slider.server.appmaster.state.StateAccessForProviders;
 import org.apache.slider.server.appmaster.web.rest.agent.CommandReport;
 import org.apache.slider.server.appmaster.web.rest.agent.ComponentStatus;
+import org.apache.slider.server.appmaster.web.rest.agent.ExecutionCommand;
 import org.apache.slider.server.appmaster.web.rest.agent.HeartBeat;
 import org.apache.slider.server.appmaster.web.rest.agent.HeartBeatResponse;
 import org.apache.slider.server.appmaster.web.rest.agent.Register;
@@ -65,6 +64,7 @@
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
+import org.mockito.Matchers;
 import org.mockito.Mockito;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -80,12 +80,15 @@
 import java.util.Map;
 import java.util.Set;
 
+import static org.easymock.EasyMock.anyBoolean;
 import static org.easymock.EasyMock.anyObject;
 import static org.easymock.EasyMock.createNiceMock;
 import static org.easymock.EasyMock.expect;
 import static org.easymock.EasyMock.replay;
+import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyCollection;
+import static org.mockito.Matchers.anyMap;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.doNothing;
 import static org.mockito.Mockito.doReturn;
@@ -105,6 +108,7 @@
                                                + "      <version>0.96.0.2.1.1</version>\n"
                                                + "      <type>YARN-APP</type>\n"
                                                + "      <minHadoopVersion>2.1.0</minHadoopVersion>\n"
+                                               + "      <exportedConfigs>hbase-site,global</exportedConfigs>\n"
                                                + "      <exportGroups>\n"
                                                + "        <exportGroup>\n"
                                                + "          <name>QuickLinks</name>\n"
@@ -132,8 +136,20 @@
                                                + "      </commandOrders>\n"
                                                + "      <components>\n"
                                                + "        <component>\n"
+                                               + "          <name>HBASE_REST</name>\n"
+                                               + "          <category>MASTER</category>\n"
+                                               + "          <commandScript>\n"
+                                               + "            <script>scripts/hbase_rest.py</script>\n"
+                                               + "            <scriptType>PYTHON</scriptType>\n"
+                                               + "            <timeout>600</timeout>\n"
+                                               + "          </commandScript>\n"
+                                               + "        </component>\n"
+                                               + "        <component>\n"
                                                + "          <name>HBASE_MASTER</name>\n"
                                                + "          <category>MASTER</category>\n"
+                                               + "          <publishConfig>true</publishConfig>\n"
+                                               + "          <autoStartOnFailure>true</autoStartOnFailure>\n"
+                                               + "          <appExports>QuickLinks-JMX_Endpoint,QuickLinks-Master_Status</appExports>\n"
                                                + "          <minInstanceCount>1</minInstanceCount>\n"
                                                + "          <maxInstanceCount>2</maxInstanceCount>\n"
                                                + "          <commandScript>\n"
@@ -146,10 +162,21 @@
                                                + "          <name>HBASE_REGIONSERVER</name>\n"
                                                + "          <category>SLAVE</category>\n"
                                                + "          <minInstanceCount>1</minInstanceCount>\n"
+                                               + "          <autoStartOnFailure>Falsee</autoStartOnFailure>\n"
                                                + "          <commandScript>\n"
                                                + "            <script>scripts/hbase_regionserver.py</script>\n"
                                                + "            <scriptType>PYTHON</scriptType>\n"
                                                + "          </commandScript>\n"
+                                               + "          <componentExports>\n"
+                                               + "            <componentExport>\n"
+                                               + "              <name>PropertyA</name>\n"
+                                               + "              <value>${THIS_HOST}:${site.global.listen_port}</value>\n"
+                                               + "            </componentExport>\n"
+                                               + "            <componentExport>\n"
+                                               + "              <name>PropertyB</name>\n"
+                                               + "              <value>AConstant</value>\n"
+                                               + "            </componentExport>\n"
+                                               + "          </componentExports>\n"
                                                + "        </component>\n"
                                                + "      </components>\n"
                                                + "      <osSpecifics>\n"
@@ -205,7 +232,7 @@
   public void testRegistration() throws IOException {
 
     ConfTree tree = new ConfTree();
-    tree.global.put(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
+    tree.global.put(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
 
     AgentProviderService aps = new AgentProviderService();
     ContainerLaunchContext ctx = createNiceMock(ContainerLaunchContext.class);
@@ -242,6 +269,10 @@
     metainfo.setApplication(new Application());
     doReturn(metainfo).when(mockAps).getApplicationMetainfo(any(SliderFileSystem.class), anyString());
 
+    Configuration conf = new Configuration();
+    conf.set(SliderXmlConfKeys.REGISTRY_PATH,
+        SliderXmlConfKeys.DEFAULT_REGISTRY_PATH);
+
     try {
       doReturn(true).when(mockAps).isMaster(anyString());
       doNothing().when(mockAps).addInstallCommand(
@@ -249,21 +280,28 @@
           eq("mockcontainer_1"),
           any(HeartBeatResponse.class),
           eq("scripts/hbase_master.py"));
+      doReturn(conf).when(mockAps).getConfig();
     } catch (SliderException e) {
     }
 
+    doNothing().when(mockAps).processAllocatedPorts(
+        anyString(),
+        anyString(),
+        anyString(),
+        anyMap()
+    );
     expect(access.isApplicationLive()).andReturn(true).anyTimes();
     ClusterDescription desc = new ClusterDescription();
-    desc.setInfo(StatusKeys.INFO_AM_HOSTNAME, "host1");
-    desc.setInfo(StatusKeys.INFO_AM_AGENT_PORT, "8088");
-    desc.setInfo(StatusKeys.INFO_AM_SECURED_AGENT_PORT, "8089");
+    desc.setOption(OptionKeys.ZOOKEEPER_QUORUM, "host1:2181");
     desc.setInfo(OptionKeys.APPLICATION_NAME, "HBASE");
     expect(access.getClusterStatus()).andReturn(desc).anyTimes();
 
     AggregateConf aggConf = new AggregateConf();
     ConfTreeOperations treeOps = aggConf.getAppConfOperations();
     treeOps.getOrAddComponent("HBASE_MASTER").put(AgentKeys.WAIT_HEARTBEAT, "0");
+    treeOps.set(OptionKeys.APPLICATION_NAME, "HBASE");
     expect(access.getInstanceDefinitionSnapshot()).andReturn(aggConf);
+    expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
     replay(access, ctx, container, sliderFileSystem);
 
     try {
@@ -276,17 +314,30 @@
                                           resourceComponent,
                                           appComponent,
                                           containerTmpDirPath);
-    } catch (SliderException | IOException he) {
+      // JDK7
+    } catch (IOException he) {
+      log.warn("{}", he, he);
+    } catch (SliderException he) {
       log.warn("{}", he, he);
     }
 
     Register reg = new Register();
     reg.setResponseId(0);
     reg.setHostname("mockcontainer_1___HBASE_MASTER");
+    Map<String,String> ports = new HashMap();
+    ports.put("a","100");
+    reg.setAllocatedPorts(ports);
     RegistrationResponse resp = mockAps.handleRegistration(reg);
     Assert.assertEquals(0, resp.getResponseId());
     Assert.assertEquals(RegistrationStatus.OK, resp.getResponseStatus());
 
+    Mockito.verify(mockAps, Mockito.times(1)).processAllocatedPorts(
+        anyString(),
+        anyString(),
+        anyString(),
+        anyMap()
+    );
+
     HeartBeat hb = new HeartBeat();
     hb.setResponseId(1);
     hb.setHostname("mockcontainer_1___HBASE_MASTER");
@@ -302,16 +353,16 @@
       public ClusterDescription getClusterStatus() {
         ClusterDescription cd = new ClusterDescription();
         cd.status = new HashMap<String, Object>();
-        Map<String, Map<String, ClusterNode>> roleMap = new HashMap<>();
-        ClusterNode cn1 = new ClusterNode(new MyContainerId(1));
+        Map<String, Map<String, ClusterNode>> roleMap = new HashMap<String, Map<String, ClusterNode>>();
+        ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
         cn1.host = "FIRST_HOST";
-        Map<String, ClusterNode> map1 = new HashMap<>();
+        Map<String, ClusterNode> map1 = new HashMap<String, ClusterNode>();
         map1.put("FIRST_CONTAINER", cn1);
-        ClusterNode cn2 = new ClusterNode(new MyContainerId(2));
+        ClusterNode cn2 = new ClusterNode(new MockContainerId(2));
         cn2.host = "SECOND_HOST";
-        Map<String, ClusterNode> map2 = new HashMap<>();
+        Map<String, ClusterNode> map2 = new HashMap<String, ClusterNode>();
         map2.put("SECOND_CONTAINER", cn2);
-        ClusterNode cn3 = new ClusterNode(new MyContainerId(3));
+        ClusterNode cn3 = new ClusterNode(new MockContainerId(3));
         cn3.host = "THIRD_HOST";
         map2.put("THIRD_CONTAINER", cn3);
 
@@ -335,7 +386,7 @@
     };
 
     aps.setAmState(appState);
-    Map<String, String> tokens = new HashMap<>();
+    Map<String, String> tokens = new HashMap<String, String>();
     aps.addRoleRelatedTokens(tokens);
     Assert.assertEquals(2, tokens.size());
     Assert.assertEquals("FIRST_HOST", tokens.get("${FIRST_ROLE_HOST}"));
@@ -344,6 +395,45 @@
   }
 
   @Test
+  public void testComponentSpecificPublishes() throws Exception {
+    InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
+    Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
+    AgentProviderService aps = new AgentProviderService();
+    AgentProviderService mockAps = Mockito.spy(aps);
+    doNothing().when(mockAps).publishApplicationInstanceData(anyString(), anyString(), anyCollection());
+    doReturn(metainfo).when(mockAps).getMetainfo();
+
+    Map<String, String> ports = new HashMap<String, String>();
+    ports.put("global.listen_port", "10010");
+    mockAps.processAndPublishComponentSpecificData(ports,
+                                                   "cid1",
+                                                   "host1",
+                                                   "HBASE_REGIONSERVER");
+    ArgumentCaptor<Collection> entriesCaptor = ArgumentCaptor.
+        forClass(Collection.class);
+    ArgumentCaptor<String> publishNameCaptor = ArgumentCaptor.
+        forClass(String.class);
+    Mockito.verify(mockAps, Mockito.times(1)).publishApplicationInstanceData(
+        anyString(),
+        publishNameCaptor.capture(),
+        entriesCaptor.capture());
+    assert entriesCaptor.getAllValues().size() == 1;
+    for (Collection coll : entriesCaptor.getAllValues()) {
+      Set<Map.Entry<String, String>> entrySet = (Set<Map.Entry<String, String>>) coll;
+      for (Map.Entry entry : entrySet) {
+        log.info("{}:{}", entry.getKey(), entry.getValue().toString());
+        if (entry.getKey().equals("PropertyA")) {
+          assert entry.getValue().toString().equals("host1:10010");
+        }
+      }
+    }
+    assert publishNameCaptor.getAllValues().size() == 1;
+    for (String coll : publishNameCaptor.getAllValues()) {
+      assert coll.equals("ComponentInstanceData");
+    }
+  }
+
+  @Test
   public void testProcessConfig() throws Exception {
     InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
     Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
@@ -354,38 +444,40 @@
     status.setClusterName("test");
     status.setComponentName("HBASE_MASTER");
     status.setRoleCommand("GET_CONFIG");
-    Map<String, String> hbaseSite = new HashMap<>();
+    Map<String, String> hbaseSite = new HashMap<String, String>();
     hbaseSite.put("hbase.master.info.port", "60012");
     hbaseSite.put("c", "d");
-    Map<String, Map<String, String>> configs = new HashMap<>();
+    Map<String, Map<String, String>> configs = 
+        new HashMap<String, Map<String, String>>();
     configs.put("hbase-site", hbaseSite);
     configs.put("global", hbaseSite);
     status.setConfigs(configs);
-    hb.setComponentStatus(new ArrayList<>(Arrays.asList(status)));
+    hb.setComponentStatus(new ArrayList<ComponentStatus>(Arrays.asList(status)));
 
-    Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<>();
-    Map<String, ClusterNode> container = new HashMap<>();
-    ClusterNode cn1 = new ClusterNode(new MyContainerId(1));
+    Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<String, Map<String, ClusterNode>>();
+    Map<String, ClusterNode> container = new HashMap<String, ClusterNode>();
+    ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
     cn1.host = "HOST1";
     container.put("cid1", cn1);
     roleClusterNodeMap.put("HBASE_MASTER", container);
 
-    ComponentInstanceState componentStatus = new ComponentInstanceState("HBASE_MASTER", "aid", "cid");
+    ComponentInstanceState componentStatus = new ComponentInstanceState("HBASE_MASTER", 
+        new MockContainerId(1), "cid");
     AgentProviderService mockAps = Mockito.spy(aps);
-    doNothing().when(mockAps).publishComponentConfiguration(anyString(), anyString(), anyCollection());
+    doNothing().when(mockAps).publishApplicationInstanceData(anyString(), anyString(), anyCollection());
     doReturn(metainfo).when(mockAps).getMetainfo();
     doReturn(roleClusterNodeMap).when(mockAps).getRoleClusterNodeMapping();
 
-    mockAps.processReturnedStatus(hb, componentStatus);
-    assert componentStatus.getConfigReported() == true;
-    ArgumentCaptor<Collection> commandCaptor = ArgumentCaptor.
+    mockAps.publishConfigAndExportGroups(hb, componentStatus, "HBASE_MASTER");
+    Assert.assertTrue(componentStatus.getConfigReported());
+    ArgumentCaptor<Collection> entriesCaptor = ArgumentCaptor.
         forClass(Collection.class);
-    Mockito.verify(mockAps, Mockito.times(3)).publishComponentConfiguration(
+    Mockito.verify(mockAps, Mockito.times(3)).publishApplicationInstanceData(
         anyString(),
         anyString(),
-        commandCaptor.capture());
-    assert commandCaptor.getAllValues().size() == 3;
-    for (Collection coll : commandCaptor.getAllValues()) {
+        entriesCaptor.capture());
+    Assert.assertEquals(3, entriesCaptor.getAllValues().size());
+    for (Collection coll : entriesCaptor.getAllValues()) {
       Set<Map.Entry<String, String>> entrySet = (Set<Map.Entry<String, String>>) coll;
       for (Map.Entry entry : entrySet) {
         log.info("{}:{}", entry.getKey(), entry.getValue().toString());
@@ -394,6 +486,16 @@
         }
       }
     }
+
+    Map<String, String> exports = mockAps.getCurrentExports("QuickLinks");
+    Assert.assertEquals(2, exports.size());
+    Assert.assertEquals(exports.get("JMX_Endpoint"), "http://HOST1:60012/jmx");
+
+    mockAps.publishConfigAndExportGroups(hb, componentStatus, "HBASE_REST");
+    Mockito.verify(mockAps, Mockito.times(3)).publishApplicationInstanceData(
+        anyString(),
+        anyString(),
+        entriesCaptor.capture());
   }
 
   @Test
@@ -404,32 +506,47 @@
     Application application = metainfo.getApplication();
     log.info("Service: " + application.toString());
     Assert.assertEquals(application.getName(), "HBASE");
-    Assert.assertEquals(application.getComponents().size(), 2);
+    Assert.assertEquals(application.getExportedConfigs(), "hbase-site,global");
+    Assert.assertEquals(application.getComponents().size(), 3);
     List<Component> components = application.getComponents();
     int found = 0;
     for (Component component : components) {
       if (component.getName().equals("HBASE_MASTER")) {
+        Assert.assertEquals(component.getAutoStartOnFailure(), "true");
+        Assert.assertEquals(component.getRequiresAutoRestart(), Boolean.TRUE);
         Assert.assertEquals(component.getMinInstanceCount(), "1");
         Assert.assertEquals(component.getMaxInstanceCount(), "2");
         Assert.assertEquals(component.getCommandScript().getScript(), "scripts/hbase_master.py");
         Assert.assertEquals(component.getCategory(), "MASTER");
+        Assert.assertEquals(component.getComponentExports().size(), 0);
+        Assert.assertEquals(component.getAppExports(), "QuickLinks-JMX_Endpoint,QuickLinks-Master_Status");
         found++;
       }
       if (component.getName().equals("HBASE_REGIONSERVER")) {
+        Assert.assertEquals(component.getAutoStartOnFailure(), "Falsee");
+        Assert.assertEquals(component.getRequiresAutoRestart(), Boolean.FALSE);
         Assert.assertEquals(component.getMinInstanceCount(), "1");
         Assert.assertNull(component.getMaxInstanceCount());
         Assert.assertEquals(component.getCommandScript().getScript(), "scripts/hbase_regionserver.py");
         Assert.assertEquals(component.getCategory(), "SLAVE");
+        Assert.assertEquals(component.getComponentExports().size(), 2);
+        List<ComponentExport> es = component.getComponentExports();
+        ComponentExport e = es.get(0);
+        Assert.assertEquals(e.getName(), "PropertyA");
+        Assert.assertEquals(e.getValue(), "${THIS_HOST}:${site.global.listen_port}");
+        e = es.get(1);
+        Assert.assertEquals(e.getName(), "PropertyB");
+        Assert.assertEquals(e.getValue(), "AConstant");
         found++;
       }
     }
     Assert.assertEquals(found, 2);
 
-    assert application.getExportGroups().size() == 1;
+    Assert.assertEquals(application.getExportGroups().size(), 1);
     List<ExportGroup> egs = application.getExportGroups();
     ExportGroup eg = egs.get(0);
-    assert eg.getName().equals("QuickLinks");
-    assert eg.getExports().size() == 2;
+    Assert.assertEquals(eg.getName(), "QuickLinks");
+    Assert.assertEquals(eg.getExports().size(), 2);
 
     found = 0;
     for (Export export : eg.getExports()) {
@@ -490,18 +607,18 @@
     String role_hm = "HBASE_MASTER";
     String role_hrs = "HBASE_REGIONSERVER";
 
-    AgentProviderService aps = new AgentProviderService();
-    AgentProviderService mockAps = Mockito.spy(aps);
+    AgentProviderService aps1 = new AgentProviderService();
+    AgentProviderService mockAps = Mockito.spy(aps1);
     doReturn(metainfo).when(mockAps).getMetainfo();
 
-    AgentProviderService mockAps2 = Mockito.spy(aps);
+    AgentProviderService mockAps2 = Mockito.spy(aps1);
     doReturn(metainfo2).when(mockAps2).getMetainfo();
 
     Assert.assertTrue(mockAps.isMaster(role_hm));
     Assert.assertFalse(mockAps.isMaster(role_hrs));
-    Assert.assertFalse(mockAps.canPublishConfig(role_hm));
+    Assert.assertTrue(mockAps.canPublishConfig(role_hm));
     Assert.assertFalse(mockAps.canPublishConfig(role_hrs));
-    Assert.assertFalse(mockAps.canAnyMasterPublishConfig());
+    Assert.assertTrue(mockAps.canAnyMasterPublishConfig());
 
     Assert.assertTrue(mockAps2.isMaster(role_hm));
     Assert.assertFalse(mockAps2.isMaster(role_hrs));
@@ -511,13 +628,13 @@
   }
 
   @Test
-  public void testOrchastratedAppStart() throws IOException {
+  public void testOrchestratedAppStart() throws IOException {
     // App has two components HBASE_MASTER and HBASE_REGIONSERVER
     // Start of HBASE_RS depends on the start of HBASE_MASTER
     InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
     Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
     ConfTree tree = new ConfTree();
-    tree.global.put(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
+    tree.global.put(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH, ".");
 
     AgentProviderService aps = new AgentProviderService();
     ContainerLaunchContext ctx = createNiceMock(ContainerLaunchContext.class);
@@ -552,6 +669,10 @@
     doReturn(access).when(mockAps).getAmState();
     doReturn(metainfo).when(mockAps).getApplicationMetainfo(any(SliderFileSystem.class), anyString());
 
+    Configuration conf = new Configuration();
+    conf.set(SliderXmlConfKeys.REGISTRY_PATH,
+        SliderXmlConfKeys.DEFAULT_REGISTRY_PATH);
+
     try {
       doReturn(true).when(mockAps).isMaster(anyString());
       doNothing().when(mockAps).addInstallCommand(
@@ -563,24 +684,23 @@
           anyString(),
           anyString(),
           any(HeartBeatResponse.class),
-          anyString());
+          anyString(),
+          Matchers.anyBoolean());
       doNothing().when(mockAps).addGetConfigCommand(
           anyString(),
           anyString(),
           any(HeartBeatResponse.class));
-      doNothing().when(mockAps).publishComponentConfiguration(
+      doNothing().when(mockAps).publishApplicationInstanceData(
           anyString(),
           anyString(),
           anyCollection());
-
+      doReturn(conf).when(mockAps).getConfig();
     } catch (SliderException e) {
     }
 
     expect(access.isApplicationLive()).andReturn(true).anyTimes();
     ClusterDescription desc = new ClusterDescription();
-    desc.setInfo(StatusKeys.INFO_AM_HOSTNAME, "host1");
-    desc.setInfo(StatusKeys.INFO_AM_AGENT_PORT, "8088");
-    desc.setInfo(StatusKeys.INFO_AM_SECURED_AGENT_PORT, "8089");
+    desc.setOption(OptionKeys.ZOOKEEPER_QUORUM, "host1:2181");
     desc.setInfo(OptionKeys.APPLICATION_NAME, "HBASE");
     expect(access.getClusterStatus()).andReturn(desc).anyTimes();
 
@@ -588,7 +708,9 @@
     ConfTreeOperations treeOps = aggConf.getAppConfOperations();
     treeOps.getOrAddComponent("HBASE_MASTER").put(AgentKeys.WAIT_HEARTBEAT, "0");
     treeOps.getOrAddComponent("HBASE_REGIONSERVER").put(AgentKeys.WAIT_HEARTBEAT, "0");
+    treeOps.set(OptionKeys.APPLICATION_NAME, "HBASE");
     expect(access.getInstanceDefinitionSnapshot()).andReturn(aggConf).anyTimes();
+    expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
     replay(access, ctx, container, sliderFileSystem);
 
     // build two containers
@@ -665,7 +787,8 @@
       Mockito.verify(mockAps, Mockito.times(0)).addStartCommand(anyString(),
                                                                 anyString(),
                                                                 any(HeartBeatResponse.class),
-                                                                anyString());
+                                                                anyString(),
+                                                                Matchers.anyBoolean());
       // RS still does not start
       hb = new HeartBeat();
       hb.setResponseId(3);
@@ -675,7 +798,8 @@
       Mockito.verify(mockAps, Mockito.times(0)).addStartCommand(anyString(),
                                                                 anyString(),
                                                                 any(HeartBeatResponse.class),
-                                                                anyString());
+                                                                anyString(),
+                                                                Matchers.anyBoolean());
 
       // MASTER succeeds install and issues start
       hb = new HeartBeat();
@@ -685,7 +809,7 @@
       cr.setRole("HBASE_MASTER");
       cr.setRoleCommand("INSTALL");
       cr.setStatus("COMPLETED");
-      Map<String, String> ap = new HashMap<>();
+      Map<String, String> ap = new HashMap<String, String>();
       ap.put("a.port", "10233");
       cr.setAllocatedPorts(ap);
       hb.setReports(Arrays.asList(cr));
@@ -694,7 +818,8 @@
       Mockito.verify(mockAps, Mockito.times(1)).addStartCommand(anyString(),
                                                                 anyString(),
                                                                 any(HeartBeatResponse.class),
-                                                                anyString());
+                                                                anyString(),
+                                                                Matchers.anyBoolean());
       Map<String, String> allocatedPorts = mockAps.getAllocatedPorts();
       Assert.assertTrue(allocatedPorts != null);
       Assert.assertTrue(allocatedPorts.size() == 1);
@@ -709,7 +834,8 @@
       Mockito.verify(mockAps, Mockito.times(1)).addStartCommand(anyString(),
                                                                 anyString(),
                                                                 any(HeartBeatResponse.class),
-                                                                anyString());
+                                                                anyString(),
+                                                                Matchers.anyBoolean());
       // MASTER succeeds start
       hb = new HeartBeat();
       hb.setResponseId(3);
@@ -733,18 +859,110 @@
       Mockito.verify(mockAps, Mockito.times(2)).addStartCommand(anyString(),
                                                                 anyString(),
                                                                 any(HeartBeatResponse.class),
-                                                                anyString());
-    } catch (SliderException | IOException he) {
+                                                                anyString(),
+                                                                Matchers.anyBoolean());
+    // JDK7 
+    } catch (SliderException he) {
+      log.warn(he.getMessage());
+    } catch (IOException he) {
       log.warn(he.getMessage());
     }
 
-    Mockito.verify(mockAps, Mockito.times(1)).publishComponentConfiguration(
+    Mockito.verify(mockAps, Mockito.times(1)).publishApplicationInstanceData(
         anyString(),
         anyString(),
         anyCollection());
   }
 
   @Test
+  public void testNotifyContainerCompleted() {
+    AgentProviderService aps = new AgentProviderService();
+    AgentProviderService mockAps = Mockito.spy(aps);
+    doNothing().when(mockAps).publishApplicationInstanceData(anyString(), anyString(), anyCollection());
+
+    ContainerId cid = new MockContainerId(1);
+    String id = cid.toString();
+    ContainerId cid2 = new MockContainerId(2);
+    mockAps.getAllocatedPorts().put("a", "100");
+    mockAps.getAllocatedPorts(id).put("b", "101");
+    mockAps.getAllocatedPorts("cid2").put("c", "102");
+
+    mockAps.getComponentInstanceData().put("cid2", new HashMap<String, String>());
+    mockAps.getComponentInstanceData().put(id, new HashMap<String, String>());
+
+    mockAps.getComponentStatuses().put("cid2_HM", new ComponentInstanceState("HM", cid2, "aid"));
+    mockAps.getComponentStatuses().put(id + "_HM", new ComponentInstanceState("HM", cid, "aid"));
+
+    Assert.assertNotNull(mockAps.getComponentInstanceData().get(id));
+    Assert.assertNotNull(mockAps.getComponentInstanceData().get("cid2"));
+
+    Assert.assertNotNull(mockAps.getComponentStatuses().get(id + "_HM"));
+    Assert.assertNotNull(mockAps.getComponentStatuses().get("cid2_HM"));
+
+    Assert.assertEquals(mockAps.getAllocatedPorts().size(), 1);
+    Assert.assertEquals(mockAps.getAllocatedPorts(id).size(), 1);
+    Assert.assertEquals(mockAps.getAllocatedPorts("cid2").size(), 1);
+
+    // Make the call
+    mockAps.notifyContainerCompleted(new MockContainerId(1));
+
+    Assert.assertEquals(mockAps.getAllocatedPorts().size(), 1);
+    Assert.assertEquals(mockAps.getAllocatedPorts(id).size(), 0);
+    Assert.assertEquals(mockAps.getAllocatedPorts("cid2").size(), 1);
+
+    Assert.assertNull(mockAps.getComponentInstanceData().get(id));
+    Assert.assertNotNull(mockAps.getComponentInstanceData().get("cid2"));
+
+    Assert.assertNull(mockAps.getComponentStatuses().get(id + "_HM"));
+    Assert.assertNotNull(mockAps.getComponentStatuses().get("cid2_HM"));
+  }
+
+  @Test
+  public void testAddInstallCommand() throws Exception {
+    InputStream metainfo_1 = new ByteArrayInputStream(metainfo_1_str.getBytes());
+    Metainfo metainfo = new MetainfoParser().parse(metainfo_1);
+    AgentProviderService aps = new AgentProviderService();
+    HeartBeatResponse hbr = new HeartBeatResponse();
+
+    StateAccessForProviders access = createNiceMock(StateAccessForProviders.class);
+    AgentProviderService mockAps = Mockito.spy(aps);
+    doReturn(access).when(mockAps).getAmState();
+
+    AggregateConf aggConf = new AggregateConf();
+    ConfTreeOperations treeOps = aggConf.getAppConfOperations();
+    treeOps.getGlobalOptions().put(AgentKeys.JAVA_HOME, "java_home");
+    treeOps.set(OptionKeys.APPLICATION_NAME, "HBASE");
+    treeOps.set("site.fs.defaultFS", "hdfs://HOST1:8020/");
+    treeOps.set("internal.data.dir.path", "hdfs://HOST1:8020/database");
+    treeOps.set(OptionKeys.ZOOKEEPER_HOSTS, "HOST1");
+
+    expect(access.getAppConfSnapshot()).andReturn(treeOps).anyTimes();
+    expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
+    expect(access.isApplicationLive()).andReturn(true).anyTimes();
+
+    doReturn("HOST1").when(mockAps).getClusterInfoPropertyValue(anyString());
+    doReturn(metainfo).when(mockAps).getMetainfo();
+
+    Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<String, Map<String, ClusterNode>>();
+    Map<String, ClusterNode> container = new HashMap<String, ClusterNode>();
+    ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
+    cn1.host = "HOST1";
+    container.put("cid1", cn1);
+    roleClusterNodeMap.put("HBASE_MASTER", container);
+    doReturn(roleClusterNodeMap).when(mockAps).getRoleClusterNodeMapping();
+
+    replay(access);
+
+    mockAps.addInstallCommand("HBASE_MASTER", "cid1", hbr, "");
+    ExecutionCommand cmd = hbr.getExecutionCommands().get(0);
+    String pkgs = cmd.getHostLevelParams().get(AgentKeys.PACKAGE_LIST);
+    Assert.assertEquals("[{\"type\":\"tarball\",\"name\":\"files/hbase-0.96.1-hadoop2-bin.tar.gz\"}]", pkgs);
+    Assert.assertEquals("java_home", cmd.getHostLevelParams().get(AgentKeys.JAVA_HOME));
+    Assert.assertEquals("cid1", cmd.getHostLevelParams().get("container_id"));
+    Assert.assertEquals(Command.INSTALL.toString(), cmd.getRoleCommand());
+  }
+
+  @Test
   public void testAddStartCommand() throws Exception {
     AgentProviderService aps = new AgentProviderService();
     HeartBeatResponse hbr = new HeartBeatResponse();
@@ -763,6 +981,8 @@
     treeOps.set("config_types", "hbase-site");
     treeOps.getGlobalOptions().put("site.hbase-site.a.port", "${HBASE_MASTER.ALLOCATED_PORT}");
     treeOps.getGlobalOptions().put("site.hbase-site.b.port", "${HBASE_MASTER.ALLOCATED_PORT}");
+    treeOps.getGlobalOptions().put("site.hbase-site.random.port", "${HBASE_MASTER.ALLOCATED_PORT}{DO_NOT_PROPAGATE}");
+    treeOps.getGlobalOptions().put("site.hbase-site.random2.port", "${HBASE_MASTER.ALLOCATED_PORT}");
 
     expect(access.getAppConfSnapshot()).andReturn(treeOps).anyTimes();
     expect(access.getInternalsSnapshot()).andReturn(treeOps).anyTimes();
@@ -770,140 +990,32 @@
 
     doReturn("HOST1").when(mockAps).getClusterInfoPropertyValue(anyString());
 
-    Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<>();
-    Map<String, ClusterNode> container = new HashMap<>();
-    ClusterNode cn1 = new ClusterNode(new MyContainerId(1));
+    Map<String, Map<String, ClusterNode>> roleClusterNodeMap = new HashMap<String, Map<String, ClusterNode>>();
+    Map<String, ClusterNode> container = new HashMap<String, ClusterNode>();
+    ClusterNode cn1 = new ClusterNode(new MockContainerId(1));
     cn1.host = "HOST1";
     container.put("cid1", cn1);
     roleClusterNodeMap.put("HBASE_MASTER", container);
     doReturn(roleClusterNodeMap).when(mockAps).getRoleClusterNodeMapping();
-    Map<String, String> allocatedPorts = new HashMap<>();
-    allocatedPorts.put("a.port", "10023");
-    allocatedPorts.put("b.port", "10024");
+    Map<String, String> allocatedPorts = new HashMap<String, String>();
+    allocatedPorts.put("hbase-site.a.port", "10023");
+    allocatedPorts.put("hbase-site.b.port", "10024");
     doReturn(allocatedPorts).when(mockAps).getAllocatedPorts();
+    Map<String, String> allocatedPorts2 = new HashMap<String, String>();
+    allocatedPorts2.put("hbase-site.random.port", "10025");
+    doReturn(allocatedPorts2).when(mockAps).getAllocatedPorts(anyString());
 
     replay(access);
 
-    mockAps.addStartCommand("HBASE_MASTER", "cid1", hbr, "");
+    mockAps.addStartCommand("HBASE_MASTER", "cid1", hbr, "", Boolean.FALSE);
     Assert.assertTrue(hbr.getExecutionCommands().get(0).getConfigurations().containsKey("hbase-site"));
     Map<String, String> hbaseSiteConf = hbr.getExecutionCommands().get(0).getConfigurations().get("hbase-site");
     Assert.assertTrue(hbaseSiteConf.containsKey("a.port"));
-    Assert.assertTrue(hbaseSiteConf.get("a.port").equals("10023"));
-    Assert.assertTrue(hbaseSiteConf.get("b.port").equals("10024"));
+    Assert.assertEquals("10023", hbaseSiteConf.get("a.port"));
+    Assert.assertEquals("10024", hbaseSiteConf.get("b.port"));
+    Assert.assertEquals("10025", hbaseSiteConf.get("random.port"));
+    assertEquals("${HBASE_MASTER.ALLOCATED_PORT}",
+        hbaseSiteConf.get("random2.port"));
   }
 
-  private static class MyContainer extends Container {
-
-    ContainerId cid = null;
-
-    @Override
-    public ContainerId getId() {
-      return this.cid;
-    }
-
-    @Override
-    public void setId(ContainerId containerId) {
-      this.cid = containerId;
-    }
-
-    @Override
-    public NodeId getNodeId() {
-      return null;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public void setNodeId(NodeId nodeId) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public String getNodeHttpAddress() {
-      return null;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public void setNodeHttpAddress(String s) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public Resource getResource() {
-      return null;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public void setResource(Resource resource) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public Priority getPriority() {
-      return null;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public void setPriority(Priority priority) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public Token getContainerToken() {
-      return null;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public void setContainerToken(Token token) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public int compareTo(Container o) {
-      return 0;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-  }
-
-  private static class MyContainerId extends ContainerId {
-    int id;
-
-    private MyContainerId(int id) {
-      this.id = id;
-    }
-
-    @Override
-    public ApplicationAttemptId getApplicationAttemptId() {
-      return null;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    protected void setApplicationAttemptId(ApplicationAttemptId applicationAttemptId) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public int getId() {
-      return id;  //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    protected void setId(int i) {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    protected void build() {
-      //To change body of implemented methods use File | Settings | File Templates.
-    }
-
-    @Override
-    public int hashCode() {
-      return this.id;
-    }
-
-    @Override
-    public String toString() {
-      return "MyContainerId{" +
-             "id=" + id +
-             '}';
-    }
-  }
 }
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java
index 3ef1839..c123fbb 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentCommandOrder.java
@@ -19,6 +19,7 @@
 package org.apache.slider.providers.agent;
 
 import org.apache.slider.providers.agent.application.metadata.CommandOrder;
+import org.apache.slider.server.appmaster.model.mock.MockContainerId;
 import org.junit.Assert;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -29,6 +30,7 @@
 public class TestComponentCommandOrder {
   protected static final Logger log =
       LoggerFactory.getLogger(TestComponentCommandOrder.class);
+  private final MockContainerId containerId = new MockContainerId(1);
 
   @Test
   public void testComponentCommandOrder() throws Exception {
@@ -43,11 +45,12 @@
     co3.setRequires("C-STARTED,D-STARTED,E-INSTALLED");
 
     ComponentCommandOrder cco = new ComponentCommandOrder(Arrays.asList(co1, co2, co3));
-    ComponentInstanceState cisB = new ComponentInstanceState("B", "cid", "aid");
-    ComponentInstanceState cisC = new ComponentInstanceState("C", "cid", "aid");
-    ComponentInstanceState cisD = new ComponentInstanceState("D", "cid", "aid");
-    ComponentInstanceState cisE = new ComponentInstanceState("E", "cid", "aid");
-    ComponentInstanceState cisE2 = new ComponentInstanceState("E", "cid", "aid");
+    ComponentInstanceState cisB = new ComponentInstanceState("B",
+        containerId, "aid");
+    ComponentInstanceState cisC = new ComponentInstanceState("C", containerId, "aid");
+    ComponentInstanceState cisD = new ComponentInstanceState("D", containerId, "aid");
+    ComponentInstanceState cisE = new ComponentInstanceState("E", containerId, "aid");
+    ComponentInstanceState cisE2 = new ComponentInstanceState("E", containerId, "aid");
     cisB.setState(State.STARTED);
     cisC.setState(State.INSTALLED);
     Assert.assertTrue(cco.canExecute("A", Command.START, Arrays.asList(cisB)));
@@ -92,8 +95,8 @@
     co.setCommand(" A-START");
     co.setRequires("B-STARTED , C-STARTED");
 
-    ComponentInstanceState cisB = new ComponentInstanceState("B", "cid", "aid");
-    ComponentInstanceState cisC = new ComponentInstanceState("C", "cid", "aid");
+    ComponentInstanceState cisB = new ComponentInstanceState("B", containerId, "aid");
+    ComponentInstanceState cisC = new ComponentInstanceState("C", containerId, "aid");
     cisB.setState(State.STARTED);
     cisC.setState(State.STARTED);
 
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java
index be9f178..a723394 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestComponentInstanceState.java
@@ -19,6 +19,7 @@
 package org.apache.slider.providers.agent;
 
 import junit.framework.TestCase;
+import org.apache.slider.server.appmaster.model.mock.MockContainerId;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -29,6 +30,7 @@
   private State[] states = new State[]{
       State.INIT, State.INSTALLING, State.INSTALLED,
       State.STARTING, State.STARTED, State.INSTALL_FAILED};
+  private final MockContainerId containerId = new MockContainerId(1);
 
   @Test
   public void testValidateSupportedCommands() {
@@ -42,7 +44,7 @@
   }
 
   @Test
-  public void testGetNextStateBasedOnResult() {
+  public void testGetNextStateBasedOnResult() throws Exception {
     TestCase.assertEquals(State.INSTALLING, State.INSTALLING.getNextState(CommandResult.IN_PROGRESS));
     TestCase.assertEquals(State.STARTING, State.STARTING.getNextState(CommandResult.IN_PROGRESS));
     expectExceptionOnGetNextForResult(IllegalArgumentException.class, State.INIT, CommandResult.IN_PROGRESS);
@@ -66,98 +68,109 @@
   }
 
   @Test
-  public void testGetNextStateBasedOnCommand() {
-    for (int index = 0; index < states.length; index++) {
-      TestCase.assertEquals(states[index], states[index].getNextState(Command.NOP));
+  public void testGetNextStateBasedOnCommand() throws Exception {
+    for (State state : states) {
+      TestCase.assertEquals(state, state.getNextState(Command.NOP));
     }
 
     TestCase.assertEquals(State.INSTALLING, State.INIT.getNextState(Command.INSTALL));
     TestCase.assertEquals(State.INSTALLING, State.INSTALL_FAILED.getNextState(Command.INSTALL));
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INSTALLED, Command.INSTALL);
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTING, Command.INSTALL);
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTED, Command.INSTALL);
+    expectIllegalArgumentException(State.INSTALLED, Command.INSTALL);
+    expectIllegalArgumentException(State.STARTING, Command.INSTALL);
+    expectIllegalArgumentException(State.STARTED, Command.INSTALL);
 
     TestCase.assertEquals(State.STARTING, State.INSTALLED.getNextState(Command.START));
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INIT, Command.START);
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INSTALL_FAILED, Command.START);
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTING, Command.START);
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.INSTALLING, Command.START);
-    expectExceptionOnGetNextForCommand(IllegalArgumentException.class, State.STARTED, Command.START);
+    expectIllegalArgumentException(State.INIT, Command.START);
+    expectIllegalArgumentException(State.INSTALL_FAILED, Command.START);
+    expectIllegalArgumentException(State.STARTING, Command.START);
+    expectIllegalArgumentException(State.INSTALLING, Command.START);
+    expectIllegalArgumentException(State.STARTED, Command.START);
+  }
+
+  protected void expectIllegalArgumentException(State state, Command command) throws
+      Exception {
+    expectExceptionOnGetNextForCommand(IllegalArgumentException.class,
+        state, command);
   }
 
   @Test
   public void validateStateTransitionNormal() {
-    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+    assertInState(State.INIT, componentInstanceState);
     TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
     TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    assertInState(State.INIT, componentInstanceState);
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.IN_PROGRESS, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
     TestCase.assertEquals(Command.START, componentInstanceState.getNextCommand());
     componentInstanceState.commandIssued(Command.START);
-    TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+    assertInState(State.STARTING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.IN_PROGRESS, Command.START);
-    TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+    assertInState(State.STARTING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.START);
-    TestCase.assertEquals(State.STARTED, componentInstanceState.getState());
+    assertInState(State.STARTED, componentInstanceState);
+  }
+
+  protected void assertInState(State state,
+      ComponentInstanceState componentInstanceState) {
+    TestCase.assertEquals(state, componentInstanceState.getState());
   }
 
   @Test
   public void validateStateTransitionScenario2() {
-    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+    assertInState(State.INIT, componentInstanceState);
     TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
     TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    assertInState(State.INIT, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+    assertInState(State.INSTALL_FAILED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
     TestCase.assertEquals(Command.START, componentInstanceState.getNextCommand());
 
     componentInstanceState.commandIssued(Command.START);
-    TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+    assertInState(State.STARTING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.START);
     componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.START);
-    TestCase.assertEquals(State.STARTED, componentInstanceState.getState());
+    assertInState(State.STARTED, componentInstanceState);
   }
 
   @Test
   public void tolerateMaxFailures() {
-    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+    assertInState(State.INIT, componentInstanceState);
     TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
     TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    assertInState(State.INIT, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+    assertInState(State.INSTALL_FAILED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+    assertInState(State.INSTALL_FAILED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+    assertInState(State.INSTALL_FAILED, componentInstanceState);
 
     try {
       componentInstanceState.commandIssued(Command.INSTALL);
@@ -168,41 +181,41 @@
 
   @Test
   public void tolerateFewFailureThenReset() {
-    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
+    assertInState(State.INIT, componentInstanceState);
     TestCase.assertEquals(true, componentInstanceState.hasPendingCommand());
     TestCase.assertEquals(Command.INSTALL, componentInstanceState.getNextCommand());
-    TestCase.assertEquals(State.INIT, componentInstanceState.getState());
+    assertInState(State.INIT, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+    assertInState(State.INSTALL_FAILED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALL_FAILED, componentInstanceState.getState());
+    assertInState(State.INSTALL_FAILED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLING, componentInstanceState.getState());
+    assertInState(State.INSTALLING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.COMPLETED, Command.INSTALL);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.START);
-    TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+    assertInState(State.STARTING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.START);
-    TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+    assertInState(State.STARTING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
 
     componentInstanceState.commandIssued(Command.START);
-    TestCase.assertEquals(State.STARTING, componentInstanceState.getState());
+    assertInState(State.STARTING, componentInstanceState);
     componentInstanceState.applyCommandResult(CommandResult.FAILED, Command.START);
-    TestCase.assertEquals(State.INSTALLED, componentInstanceState.getState());
+    assertInState(State.INSTALLED, componentInstanceState);
 
     try {
       componentInstanceState.commandIssued(Command.START);
@@ -213,7 +226,7 @@
 
   @Test
   public void testBadTransitions() {
-    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", "CID_001", "AID_001");
+    ComponentInstanceState componentInstanceState = new ComponentInstanceState("HBASE_MASTER", containerId, "AID_001");
 
     try {
       componentInstanceState.commandIssued(Command.START);
@@ -262,25 +275,25 @@
   }
 
   private <T extends Throwable> void expectExceptionOnGetNextForResult(
-      Class<T> expected, State state, CommandResult result) {
+      Class<T> expected, State state, CommandResult result) throws Exception {
     try {
       state.getNextState(result);
       TestCase.fail("Must fail");
     } catch (Exception e) {
       if (!expected.isInstance(e)) {
-        TestCase.fail("Unexpected exception " + e.getClass());
+        throw e;
       }
     }
   }
 
   private <T extends Throwable> void expectExceptionOnGetNextForCommand(
-      Class<T> expected, State state, Command command) {
+      Class<T> expected, State state, Command command) throws Exception {
     try {
       state.getNextState(command);
       TestCase.fail("Must fail");
     } catch (Exception e) {
       if (!expected.isInstance(e)) {
-        TestCase.fail("Unexpected exception " + e.getClass());
+        throw e;
       }
     }
   }
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
index c2cfafd..7314b72 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
@@ -16,6 +16,8 @@
  */
 package org.apache.slider.providers.agent;
 
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.slider.server.appmaster.model.mock.MockContainerId;
 import org.junit.Assert;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -54,10 +56,12 @@
     AgentProviderService provider = createNiceMock(AgentProviderService.class);
     HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 500);
     Assert.assertFalse(hbm.isAlive());
-    Map<String, ComponentInstanceState> statuses = new HashMap<>();
-    ComponentInstanceState state = new ComponentInstanceState("HBASE_MASTER", "Cid", "Aid");
+    Map<String, ComponentInstanceState> statuses = new HashMap<String, ComponentInstanceState>();
+    ContainerId container1 = new MockContainerId(1);
+    ComponentInstanceState state = new ComponentInstanceState("HBASE_MASTER",
+        container1, "Aid");
     state.setState(State.STARTED);
-    state.setLastHeartbeat(System.currentTimeMillis());
+    state.heartbeat(System.currentTimeMillis());
     statuses.put("label_1", state);
     expect(provider.getComponentStatuses()).andReturn(statuses).anyTimes();
     replay(provider);
@@ -72,65 +76,95 @@
   @Test
   public void testHeartbeatMonitorWithUnhealthyAndThenLost() throws Exception {
     AgentProviderService provider = createNiceMock(AgentProviderService.class);
-    HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 2 * 1000);
-    Assert.assertFalse(hbm.isAlive());
-    Map<String, ComponentInstanceState> statuses = new HashMap<>();
-    ComponentInstanceState masterState = new ComponentInstanceState("HBASE_MASTER", "Cid1", "Aid1");
+    long now = 100000;
+    int wakeupInterval = 2 * 1000;
+
+    Map<String, ComponentInstanceState> statuses = new HashMap<String, ComponentInstanceState>();
+    ContainerId masterContainer = new MockContainerId(1); 
+    ContainerId slaveContainer = new MockContainerId(2); 
+    ComponentInstanceState masterState = new ComponentInstanceState("HBASE_MASTER",
+        masterContainer, "Aid1");
+    String masterLabel = "Aid1_Cid1_HBASE_MASTER";
+    statuses.put(masterLabel, masterState);
+
+    ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER",
+        slaveContainer, "Aid1");
+    String slaveLabel = "Aid1_Cid2_HBASE_REGIONSERVER";
+    statuses.put(slaveLabel, slaveState);
+
     masterState.setState(State.STARTED);
-    masterState.setLastHeartbeat(System.currentTimeMillis());
-    statuses.put("Aid1_Cid1_HBASE_MASTER", masterState);
-
-    ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER", "Cid2", "Aid1");
+    masterState.heartbeat(now);
     slaveState.setState(State.STARTED);
-    slaveState.setLastHeartbeat(System.currentTimeMillis());
-    statuses.put("Aid1_Cid2_HBASE_REGIONSERVER", slaveState);
-
+    slaveState.heartbeat(now);
     expect(provider.getComponentStatuses()).andReturn(statuses).anyTimes();
-    expect(provider.releaseContainer("Aid1_Cid2_HBASE_REGIONSERVER")).andReturn(true).once();
     replay(provider);
-    hbm.start();
 
-    Thread.sleep(1 * 1000);
+
+    HeartbeatMonitor heartbeatMonitor = new HeartbeatMonitor(provider,
+        wakeupInterval);
+    Assert.assertFalse(heartbeatMonitor.isAlive());
+    now += wakeupInterval;
+    masterState.setState(State.STARTED);
+    masterState.heartbeat(now);
+    
+    slaveState.setState(State.STARTED);
     // just dial back by at least 2 sec but no more than 4
-    slaveState.setLastHeartbeat(System.currentTimeMillis() - (2 * 1000 + 100));
-    masterState.setLastHeartbeat(System.currentTimeMillis());
+    slaveState.heartbeat(now - (wakeupInterval + 100));
 
-    Thread.sleep(1 * 1000 + 500);
-    masterState.setLastHeartbeat(System.currentTimeMillis());
 
-    log.info("Slave container state {}", slaveState.getContainerState());
-    Assert.assertEquals(ContainerState.HEALTHY, masterState.getContainerState());
-    Assert.assertEquals(ContainerState.UNHEALTHY, slaveState.getContainerState());
+    assertInState(ContainerState.HEALTHY, masterState, now);
+    assertInState(ContainerState.HEALTHY, slaveState, now);
+    
+    //tick #1
+    heartbeatMonitor.doWork(now);
 
-    Thread.sleep(1 * 1000);
-    // some lost heartbeats are ignored (e.g. ~ 1 sec)
-    masterState.setLastHeartbeat(System.currentTimeMillis() - 1 * 1000);
+    assertInState(ContainerState.HEALTHY, masterState, now);
+    assertInState(ContainerState.UNHEALTHY, slaveState, now);
 
-    Thread.sleep(1 * 1000 + 500);
+    // heartbeat from the master
+    masterState.heartbeat(now + 1500);
 
-    log.info("Slave container state {}", slaveState.getContainerState());
-    Assert.assertEquals(ContainerState.HEALTHY, masterState.getContainerState());
-    Assert.assertEquals(ContainerState.HEARTBEAT_LOST, slaveState.getContainerState());
-    hbm.shutdown();
+    // tick #2
+    now += wakeupInterval;
+    heartbeatMonitor.doWork(now);
+
+    assertInState(ContainerState.HEALTHY, masterState, now);
+    assertInState(ContainerState.HEARTBEAT_LOST, slaveState, now);
+  }
+
+  protected void assertInState(ContainerState expectedState,
+      ComponentInstanceState componentInstanceState, long now) {
+    ContainerState actualState = componentInstanceState.getContainerState();
+    if (!expectedState.equals(actualState)) {
+      // mismatch
+      Assert.fail(String.format("at [%06d] Expected component state %s " +
+                                "but found state %s in in component %s",
+          now, expectedState, actualState, componentInstanceState));
+    }
   }
 
   @Test
   public void testHeartbeatTransitions() {
-    ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER", "Cid2", "Aid1");
+    ContainerId container2 = new MockContainerId(2);
+    ComponentInstanceState slaveState = new ComponentInstanceState("HBASE_REGIONSERVER",
+        container2, "Aid1");
     slaveState.setState(State.STARTED);
 
-    Assert.assertEquals(ContainerState.INIT, slaveState.getContainerState());
-    slaveState.setLastHeartbeat(System.currentTimeMillis());
-    Assert.assertEquals(ContainerState.HEALTHY, slaveState.getContainerState());
+    long lastHeartbeat = System.currentTimeMillis();
+    assertInState(ContainerState.INIT, slaveState, 0);
+    slaveState.heartbeat(lastHeartbeat);
+    assertInState(ContainerState.HEALTHY, slaveState, lastHeartbeat);
 
     slaveState.setContainerState(ContainerState.UNHEALTHY);
-    Assert.assertEquals(ContainerState.UNHEALTHY, slaveState.getContainerState());
-    slaveState.setLastHeartbeat(System.currentTimeMillis());
-    Assert.assertEquals(ContainerState.HEALTHY, slaveState.getContainerState());
+    lastHeartbeat = System.currentTimeMillis();
+    assertInState(ContainerState.UNHEALTHY, slaveState, lastHeartbeat);
+    slaveState.heartbeat(lastHeartbeat);
+    assertInState(ContainerState.HEALTHY, slaveState, lastHeartbeat);
 
     slaveState.setContainerState(ContainerState.HEARTBEAT_LOST);
-    Assert.assertEquals(ContainerState.HEARTBEAT_LOST, slaveState.getContainerState());
-    slaveState.setLastHeartbeat(System.currentTimeMillis());
-    Assert.assertEquals(ContainerState.HEARTBEAT_LOST, slaveState.getContainerState());
+    assertInState(ContainerState.HEARTBEAT_LOST, slaveState, lastHeartbeat);
+    lastHeartbeat = System.currentTimeMillis();
+    slaveState.heartbeat(lastHeartbeat);
+    assertInState(ContainerState.HEARTBEAT_LOST, slaveState, lastHeartbeat);
   }
 }
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/MetainfoParserTest.java b/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/TestMetainfoParser.java
similarity index 87%
rename from slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/MetainfoParserTest.java
rename to slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/TestMetainfoParser.java
index 61c53df..98f0afb 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/MetainfoParserTest.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/application/metadata/TestMetainfoParser.java
@@ -27,9 +27,9 @@
 /**
  *
  */
-public class MetainfoParserTest {
+public class TestMetainfoParser {
   protected static final Logger log =
-      LoggerFactory.getLogger(MetainfoParserTest.class);
+      LoggerFactory.getLogger(TestMetainfoParser.class);
   public static final String METAINFO_XML =
       "/org/apache/slider/providers/agent/application/metadata/metainfo.xml";
 
@@ -52,6 +52,10 @@
     for (Component comp : application.getComponents()) {
       if (comp != null && comp.getName().equals("NIMBUS")) {
         found = true;
+        Assert.assertEquals(0, comp.getComponentExports().size());
+      }
+      if (comp != null && comp.getName().equals("SUPERVISOR")) {
+        Assert.assertEquals(1, comp.getComponentExports().size());
       }
     }
     assert found;
diff --git a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java
index 17fbe2b..7e2ab3c 100644
--- a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java
+++ b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/agent/TestAMAgentWebServices.java
@@ -24,7 +24,6 @@
 import com.sun.jersey.api.client.config.ClientConfig;
 import com.sun.jersey.api.client.config.DefaultClientConfig;
 import com.sun.jersey.api.json.JSONConfiguration;
-import com.sun.jersey.test.framework.JerseyTest;
 import junit.framework.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -38,12 +37,12 @@
 import org.apache.slider.server.appmaster.model.mock.MockSliderClusterProtocol;
 import org.apache.slider.server.appmaster.state.AppState;
 import org.apache.slider.server.appmaster.state.ProviderAppState;
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
 import org.apache.slider.server.appmaster.web.WebAppApi;
 import org.apache.slider.server.appmaster.web.WebAppApiImpl;
 import org.apache.slider.server.appmaster.web.rest.RestPaths;
 import org.apache.slider.server.services.security.CertificateManager;
 import org.apache.slider.server.services.security.SecurityUtils;
-import org.apache.slider.test.SliderTestBase;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -125,11 +124,12 @@
       appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES);
       appState.buildInstance(
           factory.newInstanceDefinition(0, 0, 0),
+          new Configuration(),
           new Configuration(false),
           factory.ROLES,
           fs,
           historyPath,
-          null, null);
+          null, null, new SimpleReleaseSelector());
     } catch (Exception e) {
       log.error("Failed to set up app {}", e);
     }
diff --git a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java
index 134f0bc..91aa2b0 100644
--- a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java
+++ b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/management/TestAMManagementWebServices.java
@@ -45,6 +45,7 @@
 import org.apache.slider.server.appmaster.model.mock.MockSliderClusterProtocol;
 import org.apache.slider.server.appmaster.state.AppState;
 import org.apache.slider.server.appmaster.state.ProviderAppState;
+import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
 import org.apache.slider.server.appmaster.web.WebAppApi;
 import org.apache.slider.server.appmaster.web.WebAppApiImpl;
 import org.apache.slider.server.appmaster.web.rest.AMWebServices;
@@ -170,12 +171,20 @@
           appState.setContainerLimits(RM_MAX_RAM, RM_MAX_CORES);
           appState.buildInstance(
               factory.newInstanceDefinition(0, 0, 0),
+              new Configuration(),
               new Configuration(false),
               factory.ROLES,
               fs,
               historyPath,
-              null, null);
-        } catch (IOException | BadClusterStateException | URISyntaxException | BadConfigException e) {
+              null, null, new SimpleReleaseSelector());
+// JDK7        } catch (IOException | BadClusterStateException | URISyntaxException | BadConfigException e) {
+        } catch (IOException e) {
+          log.error("{}", e, e);
+        } catch (BadClusterStateException e) {
+          log.error("{}", e, e);
+        } catch (URISyntaxException e) {
+          log.error("{}", e, e);
+        } catch (BadConfigException e) {
           log.error("{}", e, e);
         }
         ProviderAppState providerAppState = new ProviderAppState("undefined",
diff --git a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java
index 2427009..eb368e3 100644
--- a/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java
+++ b/slider-core/src/test/java/org/apache/slider/server/appmaster/web/rest/publisher/TestAgentProviderService.java
@@ -16,20 +16,22 @@
  */
 package org.apache.slider.server.appmaster.web.rest.publisher;
 
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.slider.providers.agent.AgentProviderService;
-import org.apache.slider.server.appmaster.AMViewForProviders;
+import org.apache.slider.server.appmaster.actions.QueueAccess;
 import org.apache.slider.server.appmaster.state.StateAccessForProviders;
 import org.apache.slider.server.services.registry.RegistryViewForProviders;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 /**
  *
  */
-public class TestAgentProviderService extends AgentProviderService{
+public class TestAgentProviderService extends AgentProviderService {
   protected static final Logger log =
       LoggerFactory.getLogger(TestAgentProviderService.class);
 
@@ -40,14 +42,16 @@
 
   @Override
   public void bind(StateAccessForProviders stateAccessor,
-                   RegistryViewForProviders reg, AMViewForProviders amView) {
-    super.bind(stateAccessor, reg, amView);
-    Map<String,String> dummyProps = new HashMap<>();
+      RegistryViewForProviders reg,
+      QueueAccess queueAccess,
+      List<Container> liveContainers) {
+    super.bind(stateAccessor, reg, queueAccess, liveContainers);
+    Map<String,String> dummyProps = new HashMap<String, String>();
     dummyProps.put("prop1", "val1");
     dummyProps.put("prop2", "val2");
     log.info("publishing dummy-site.xml with values {}", dummyProps);
-    publishComponentConfiguration("dummy-site", "dummy configuration",
-                                  dummyProps.entrySet());
+    publishApplicationInstanceData("dummy-site", "dummy configuration",
+                                   dummyProps.entrySet());
 
   }
 
diff --git a/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java b/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java
index e77eeb3..45fdc86 100644
--- a/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java
+++ b/slider-core/src/test/java/org/apache/slider/server/services/workflow/ProcessCommandFactory.java
@@ -37,7 +37,7 @@
    * @return commands
    */
   public List<String> ls(File dir) {
-    List<String> commands = new ArrayList<>(5);
+    List<String> commands = new ArrayList<String>(5);
     commands.add("ls");
     commands.add("-1");
     commands.add(dir.getAbsolutePath());
@@ -50,7 +50,7 @@
    * @return commands
    */
   public List<String> echo(String text) {
-    List<String> commands = new ArrayList<>(5);
+    List<String> commands = new ArrayList<String>(5);
     commands.add("echo");
     commands.add(text);
     return commands;
@@ -72,7 +72,7 @@
    * @return commands
    */
   public List<String> exitFalse() {
-    List<String> commands = new ArrayList<>(2);
+    List<String> commands = new ArrayList<String>(2);
     commands.add("false");
     return commands;
   }
diff --git a/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java b/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java
index 9514f47..dc160d9 100644
--- a/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java
+++ b/slider-core/src/test/java/org/apache/slider/server/services/workflow/TestWorkflowExecutorService.java
@@ -20,10 +20,14 @@
 
 import org.junit.Test;
 
+import java.util.concurrent.ExecutorService;
 
+
+/**
+ * Basic tests for executor service
+ */
 public class TestWorkflowExecutorService extends WorkflowServiceTestBase {
 
-
   @Test
   public void testAsyncRun() throws Throwable {
 
@@ -51,7 +55,8 @@
     assertNotNull(runnable.getException());
   }
 
-  private static class ExecutorSvc extends AbstractWorkflowExecutorService {
+  private static class ExecutorSvc
+      extends WorkflowExecutorService<ExecutorService> {
     private ExecutorSvc() {
       super("ExecutorService",
           ServiceThreadFactory.singleThreadExecutor("test", true));
diff --git a/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java b/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java
index cc2cc9b..d739324 100644
--- a/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java
+++ b/slider-core/src/test/java/org/apache/slider/test/MiniZooKeeperCluster.java
@@ -75,9 +75,9 @@
     this.started = false;
     this.configuration = configuration;
     activeZKServerIndex = -1;
-    zooKeeperServers = new ArrayList<>();
-    clientPortList = new ArrayList<>();
-    standaloneServerFactoryList = new ArrayList<>();
+    zooKeeperServers = new ArrayList<ZooKeeperServer>();
+    clientPortList = new ArrayList<Integer>();
+    standaloneServerFactoryList = new ArrayList<NIOServerCnxnFactory>();
   }
 
   public void setDefaultClientPort(int clientPort) {
diff --git a/slider-core/src/test/python/agent/main.py b/slider-core/src/test/python/agent/main.py
index 8b7044e..e50642d 100755
--- a/slider-core/src/test/python/agent/main.py
+++ b/slider-core/src/test/python/agent/main.py
@@ -34,9 +34,8 @@
   parser.add_option("--config", dest="conf_folder", help="conf folder")
   parser.add_option('--command', dest='command', help='command to execute')
   parser.add_option('--label', dest='label', help='label')
-  parser.add_option('--host', dest='host', help='port')
-  parser.add_option('--port', dest='port', help='host')
-  parser.add_option('--secured_port', dest='secured_port', help='host')
+  parser.add_option('--zk-quorum', dest='host:2181', help='zookeeper quorum')
+  parser.add_option('--zk-reg-path', dest='/register/org-apache-slider/cl1', help='zookeeper registry path')
 
   (options, args) = parser.parse_args()
 
diff --git a/slider-core/src/test/resources/log4j.properties b/slider-core/src/test/resources/log4j.properties
index a552a55..c1a524d 100644
--- a/slider-core/src/test/resources/log4j.properties
+++ b/slider-core/src/test/resources/log4j.properties
@@ -42,7 +42,7 @@
 log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN
 log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
 log4j.logger.org.apache.hadoop.hdfs=WARN
-
+log4j.logger.BlockStateChange=WARN
 
 log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN
 log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl=WARN
diff --git a/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml b/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml
index 3d24f96..d9004ad 100644
--- a/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml
+++ b/slider-core/src/test/resources/org/apache/slider/providers/agent/application/metadata/metainfo.xml
@@ -22,11 +22,67 @@
     <name>STORM</name>
     <comment>Apache Hadoop Stream processing framework</comment>
     <version>0.9.1.2.1</version>
+    <exportedConfigs>storm-site</exportedConfigs>
+
+    <exportGroups>
+      <exportGroup>
+        <name>QuickLinks</name>
+        <exports>
+          <export>
+            <name>app.jmx</name>
+            <value>http://${STORM_REST_API_HOST}:${site.global.rest_api_port}/api/cluster/summary</value>
+          </export>
+          <export>
+            <name>app.monitor</name>
+            <value>http://${STORM_UI_SERVER_HOST}:${site.storm-site.ui.port}</value>
+          </export>
+          <export>
+            <name>app.metrics</name>
+            <value>http://${site.global.ganglia_server_host}/cgi-bin/rrd.py?c=${site.global.ganglia_server_id}</value>
+          </export>
+          <export>
+            <name>ganglia.ui</name>
+            <value>http://${site.global.ganglia_server_host}/ganglia?c=${site.global.ganglia_server_id}</value>
+          </export>
+          <export>
+            <name>nimbus.url</name>
+            <value>http://${NIMBUS_HOST}:${site.storm-site.nimbus.thrift.port}</value>
+          </export>
+        </exports>
+      </exportGroup>
+    </exportGroups>
+
+    <commandOrders>
+      <commandOrder>
+        <command>NIMBUS-START</command>
+        <requires>SUPERVISOR-INSTALLED,STORM_UI_SERVER-INSTALLED,DRPC_SERVER-INSTALLED,STORM_REST_API-INSTALLED
+        </requires>
+      </commandOrder>
+      <commandOrder>
+        <command>SUPERVISOR-START</command>
+        <requires>NIMBUS-STARTED</requires>
+      </commandOrder>
+      <commandOrder>
+        <command>DRPC_SERVER-START</command>
+        <requires>NIMBUS-STARTED</requires>
+      </commandOrder>
+      <commandOrder>
+        <command>STORM_REST_API-START</command>
+        <requires>NIMBUS-STARTED,DRPC_SERVER-STARTED,STORM_UI_SERVER-STARTED</requires>
+      </commandOrder>
+      <commandOrder>
+        <command>STORM_UI_SERVER-START</command>
+        <requires>NIMBUS-STARTED</requires>
+      </commandOrder>
+    </commandOrders>
+
     <components>
 
       <component>
         <name>NIMBUS</name>
         <category>MASTER</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
+        <appExports>QuickLinks-nimbus.url,QuickLinks-ganglia.ui,QuickLinks-app.metrics</appExports>
         <commandScript>
           <script>scripts/nimbus.py</script>
           <scriptType>PYTHON</scriptType>
@@ -37,6 +93,8 @@
       <component>
         <name>STORM_REST_API</name>
         <category>MASTER</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
+        <appExports>QuickLinks-app.jmx</appExports>
         <commandScript>
           <script>scripts/rest_api.py</script>
           <scriptType>PYTHON</scriptType>
@@ -47,6 +105,13 @@
       <component>
         <name>SUPERVISOR</name>
         <category>SLAVE</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
+        <componentExports>
+          <componentExport>
+            <name>log_viewer_port</name>
+            <value>${THIS_HOST}:${site.storm-site.logviewer.port}</value>
+          </componentExport>
+        </componentExports>
         <commandScript>
           <script>scripts/supervisor.py</script>
           <scriptType>PYTHON</scriptType>
@@ -57,6 +122,9 @@
       <component>
         <name>STORM_UI_SERVER</name>
         <category>MASTER</category>
+        <publishConfig>true</publishConfig>
+        <appExports>QuickLinks-app.monitor</appExports>
+        <autoStartOnFailure>true</autoStartOnFailure>
         <commandScript>
           <script>scripts/ui_server.py</script>
           <scriptType>PYTHON</scriptType>
@@ -67,6 +135,7 @@
       <component>
         <name>DRPC_SERVER</name>
         <category>MASTER</category>
+        <autoStartOnFailure>true</autoStartOnFailure>
         <commandScript>
           <script>scripts/drpc_server.py</script>
           <scriptType>PYTHON</scriptType>
@@ -86,10 +155,5 @@
         </packages>
       </osSpecific>
     </osSpecifics>
-
-    <configuration-dependencies>
-      <config-type>storm-site</config-type>
-      <config-type>global</config-type>
-    </configuration-dependencies>
   </application>
 </metainfo>
diff --git a/slider-funtest/pom.xml b/slider-funtest/pom.xml
index cb16669..39d28e0 100644
--- a/slider-funtest/pom.xml
+++ b/slider-funtest/pom.xml
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
   </parent>
   <properties>
     <work.dir>package-tmp</work.dir>
@@ -67,14 +67,6 @@
       
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
@@ -93,8 +85,17 @@
       <!-- test -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <version>${maven-surefire-plugin.version}</version>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>${maven-failsafe-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>run-integration-tests</id>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
         <configuration>
           <!--mvn process fork options-->
           <reuseForks>${test.reuseForks}</reuseForks>
@@ -121,12 +122,6 @@
             <test.app.resource>../slider-core/src/test/app_packages/test_command_log/resources.json</test.app.resource>
             <test.app.template>../slider-core/src/test/app_packages/test_command_log/appConfig.json</test.app.template>
           </systemPropertyVariables>
-          <includes>
-            <include>**/Test*.java</include>
-          </includes>
-          <excludes>
-            <exclude>**/Test*$*.java</exclude>
-          </excludes>
         </configuration>
       </plugin>
   
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy
index c42589a..e0b87f7 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/abstracttests/AbstractTestBuildSetup.groovy
@@ -143,7 +143,6 @@
   @Test
   public void testConfHasDefaultFS() throws Throwable {
     Configuration conf = loadSliderConf()
-    assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
     String fs = conf.get("fs.defaultFS")
     log.info("Test Filesystem $fs")
     assert fs != null
@@ -152,9 +151,7 @@
 
   @Test
   public void testConfHasRM() throws Throwable {
-
     Configuration conf = loadSliderConf()
-    assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
     String val = conf.get(YarnConfiguration.RM_ADDRESS)
     log.info("$YarnConfiguration.RM_ADDRESS = $val")
     assert val != YarnConfiguration.DEFAULT_RM_ADDRESS
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
index 26ae2bb..0a0ac16 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
@@ -23,10 +23,6 @@
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
-import org.apache.slider.funtest.framework.AgentUploads
-import org.apache.slider.funtest.framework.CommandTestBase
-import org.apache.slider.funtest.framework.FuntestProperties
-import org.apache.slider.funtest.framework.SliderShell
 import org.apache.tools.zip.ZipEntry
 import org.apache.tools.zip.ZipOutputStream
 import org.junit.Before
@@ -65,7 +61,6 @@
   public TemporaryFolder folder = new TemporaryFolder();
 
   public static void assumeAgentTestsEnabled() {
-    assumeFunctionalTestsEnabled()
     assume(AGENTTESTS_ENABLED, "Agent tests disabled")
   }
 
@@ -158,31 +153,6 @@
     return null;
   }
 
-  public static boolean isApplicationInState(String text, String applicationName) {
-    boolean exists = false
-    SliderShell shell = slider(EXIT_SUCCESS,
-        [
-            ACTION_LIST,
-            applicationName])
-    for (String str in shell.out) {
-      if (str.contains(text)) {
-        exists = true
-      }
-    }
-
-    return exists
-  }
-
-  protected void ensureApplicationIsUp(String clusterName) {
-    repeatUntilTrue(this.&isApplicationUp, 15, 1000 * 3, ['arg1': clusterName],
-        true, 'Application did not start, aborting test.')
-  }
-
-  boolean isApplicationUp(Map<String, String> args) {
-    String applicationName = args['arg1'];
-    return isApplicationInState("RUNNING", applicationName);
-  }
-
   public static void addDir(File dirObj, ZipOutputStream zipFile, String prefix) {
     dirObj.eachFile() { file ->
       if (file.directory) {
@@ -196,23 +166,6 @@
     }
   }
 
-  protected void repeatUntilTrue(Closure c, int maxAttempts, int sleepDur, Map args,
-                                 boolean failIfUnsuccessful = false, String message = "") {
-    int attemptCount = 0
-    while (attemptCount < maxAttempts) {
-      if (c(args)) {
-        break
-      };
-      attemptCount++;
-
-      if (failIfUnsuccessful) {
-        assert attemptCount != maxAttempts, message
-      }
-
-      sleep(sleepDur)
-    }
-  }
-
   protected void cleanup(String applicationName) throws Throwable {
     if (setup_failed) {
       // cleanup probably won't work if setup failed
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 08d352a..278bd2b 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -66,8 +66,6 @@
 
   public static final int SLIDER_TEST_TIMEOUT
 
-  public static final boolean FUNTESTS_ENABLED
-
   public static final String YARN_RAM_REQUEST
   
 
@@ -83,8 +81,6 @@
     SLIDER_TEST_TIMEOUT = getTimeOptionMillis(SLIDER_CONFIG,
         KEY_TEST_TIMEOUT,
         1000 * DEFAULT_TEST_TIMEOUT_SECONDS)
-    FUNTESTS_ENABLED =
-        SLIDER_CONFIG.getBoolean(KEY_SLIDER_FUNTESTS_ENABLED, true)
 
     YARN_RAM_REQUEST = SLIDER_CONFIG.get(
         KEY_TEST_YARN_RAM_REQUEST,
@@ -215,6 +211,20 @@
     ])
   }
 
+
+  static SliderShell freeze(
+      int exitCode,
+      String name,
+      Collection<String> args) {
+    slider(exitCode, [ACTION_FREEZE, name] + args)
+  }
+
+  /**
+   * Freeze cluster: no exit code checking
+   * @param name
+   * @param args
+   * @return
+   */
   static SliderShell freeze(String name, Collection<String> args) {
     slider([ACTION_FREEZE, name] + args)
   }
@@ -292,7 +302,7 @@
   }
 
   static SliderShell thaw(String name, Collection<String> args) {
-    slider([ACTION_THAW, name] + args)
+    slider(0, [ACTION_THAW, name] + args)
   }
 
   static SliderShell registry(int result, Collection<String> commands) {
@@ -327,9 +337,7 @@
    * @param cluster
    */
   static void setupCluster(String cluster) {
-    if (FUNTESTS_ENABLED) {
-      ensureClusterDestroyed(cluster)
-    }
+    ensureClusterDestroyed(cluster)
   }
 
   /**
@@ -338,9 +346,7 @@
    * @param name cluster name
    */
   static void teardown(String name) {
-    if (FUNTESTS_ENABLED) {
-      freezeForce(name)
-    }
+    freezeForce(name)
   }
 
   /**
@@ -508,11 +514,46 @@
     return status
   }
 
-  /**
-   * if tests are not enabled: skip them  
-   */
-  public static void assumeFunctionalTestsEnabled() {
-    assume(FUNTESTS_ENABLED, "Functional tests disabled")
+  protected void ensureApplicationIsUp(String clusterName) {
+    repeatUntilTrue(this.&isApplicationUp, 15, 1000 * 3, ['arg1': clusterName],
+      true, 'Application did not start, aborting test.')
+  }
+
+  protected boolean isApplicationUp(Map<String, String> args) {
+    String applicationName = args['arg1'];
+    return isApplicationInState("RUNNING", applicationName);
+  }
+
+  public static boolean isApplicationInState(String text, String applicationName) {
+    boolean exists = false
+    SliderShell shell = slider(0,
+      [
+        ACTION_LIST,
+        applicationName])
+    for (String str in shell.out) {
+      if (str.contains(text)) {
+        exists = true
+      }
+    }
+
+    return exists
+  }
+
+  protected void repeatUntilTrue(Closure c, int maxAttempts, int sleepDur, Map args,
+                                 boolean failIfUnsuccessful = false, String message = "") {
+    int attemptCount = 0
+    while (attemptCount < maxAttempts) {
+      if (c(args)) {
+        break
+      };
+      attemptCount++;
+
+      if (failIfUnsuccessful) {
+        assert attemptCount != maxAttempts, message
+      }
+
+      sleep(sleepDur)
+    }
   }
 
 }
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy
index 9b63c22..1096dfa 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/FuntestProperties.groovy
@@ -40,7 +40,6 @@
   String KEY_SLIDER_TEST_NUM_WORKERS = "slider.test.cluster.size"
   int DEFAULT_SLIDER_NUM_WORKERS = 1
 
-  String KEY_SLIDER_TEST_ZK_HOSTS = "slider.test.zkhosts";
   String DEFAULT_SLIDER_ZK_HOSTS = "localhost:2181";
 
   /**
@@ -49,8 +48,6 @@
   String KEY_AM_RESTART_SLEEP_TIME = "slider.test.am.restart.time"
   int DEFAULT_AM_RESTART_SLEEP_TIME = 30000
 
-  String KEY_SLIDER_FUNTESTS_ENABLED = "slider.funtest.enabled"
-
   String CLIENT_CONFIG_FILENAME = SliderKeys.CLIENT_RESOURCE
   
   String ENV_CONF_DIR = "SLIDER_CONF_DIR"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestBuildSetup.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/BuildSetupIT.groovy
similarity index 95%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestBuildSetup.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/BuildSetupIT.groovy
index b6f9a12..c8fdb97 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestBuildSetup.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/BuildSetupIT.groovy
@@ -32,5 +32,5 @@
  */
 @CompileStatic
 @Slf4j
-class TestBuildSetup extends AbstractTestBuildSetup {
+class BuildSetupIT extends AbstractTestBuildSetup {
 }
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestClusterConnectivity.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/ClusterConnectivityIT.groovy
similarity index 95%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestClusterConnectivity.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/ClusterConnectivityIT.groovy
index b9d768a..9b8fe6f 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestClusterConnectivity.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/ClusterConnectivityIT.groovy
@@ -34,16 +34,11 @@
  * Test basic connectivity with the target cluster, including 
  * HDFS, YARN and ZK
  */
-class TestClusterConnectivity extends CommandTestBase {
+class ClusterConnectivityIT extends CommandTestBase {
 
 
   public static final int CONNECT_TIMEOUT = 2000
 
-  @BeforeClass
-  public static void setup() {
-    assumeFunctionalTestsEnabled()
-  }
-  
   @Test
   public void testFileSystemUp() throws Throwable {
 
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestSignCorrection.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/SignCorrectionIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestSignCorrection.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/SignCorrectionIT.groovy
index 7feb11d..eee75a8 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/TestSignCorrection.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/basic/SignCorrectionIT.groovy
@@ -26,7 +26,7 @@
  * This just verifies the two's complement sign correction that will
  * be applied after the return code is picked up from the shell
  */
-class TestSignCorrection {
+class SignCorrectionIT {
 
   @Test
   public void test255ToMinus1() throws Throwable {
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestListCommand.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/ListCommandIT.groovy
similarity index 87%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestListCommand.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/ListCommandIT.groovy
index ce7b497..20bac88 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestListCommand.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/ListCommandIT.groovy
@@ -26,13 +26,8 @@
 
 @CompileStatic
 @Slf4j
-public class TestListCommand extends CommandTestBase {
+public class ListCommandIT extends CommandTestBase {
 
-  @BeforeClass
-  public static void prepareCluster() {
-    assumeFunctionalTestsEnabled();
-  }
-  
   @Test
   public void testListAll() throws Throwable {
     assertSuccess(list(null))
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestSimpleCommands.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/SimpleCommandsIT.groovy
similarity index 95%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestSimpleCommands.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/SimpleCommandsIT.groovy
index 2d00130..bf742c9 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestSimpleCommands.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/SimpleCommandsIT.groovy
@@ -28,7 +28,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestSimpleCommands extends CommandTestBase {
+public class SimpleCommandsIT extends CommandTestBase {
 
   @Test
   public void testVersion() throws Throwable {
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestUnknownClusterOperations.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/UnknownClusterOperationsIT.groovy
similarity index 94%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestUnknownClusterOperations.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/UnknownClusterOperationsIT.groovy
index 7791c3c..39ae4dd 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/TestUnknownClusterOperations.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/commands/UnknownClusterOperationsIT.groovy
@@ -35,15 +35,10 @@
 @CompileStatic
 @Slf4j
 @org.junit.experimental.categories.Category(FunctionalTests)
-public class TestUnknownClusterOperations extends CommandTestBase {
+public class UnknownClusterOperationsIT extends CommandTestBase {
 
   public static final String UNKNOWN = "unknown_cluster"
 
-  @BeforeClass
-  public static void prepareCluster() {
-    assumeFunctionalTestsEnabled();
-  }
-
   @Test
   public void testFreezeUnknownCluster() throws Throwable {
     SliderShell shell = freeze(UNKNOWN)
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentClusterLifecycle.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
similarity index 94%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentClusterLifecycle.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index 0d643ca..6b0f2bd 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentClusterLifecycle.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -36,7 +36,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestAgentClusterLifecycle extends AgentCommandTestBase
+public class AgentClusterLifecycleIT extends AgentCommandTestBase
   implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
 
 
@@ -126,7 +126,7 @@
       log.info("Connected via Client {}", sliderClient.toString())
 
       //freeze
-      freeze(CLUSTER, [
+      freeze(0, CLUSTER, [
           ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
           ARG_MESSAGE, "freeze-in-test-cluster-lifecycle"
       ])
@@ -145,7 +145,7 @@
       exists(0, CLUSTER)
       describe " >>> Cluster is now thawed."
 
-      freeze(CLUSTER,
+      freeze(0, CLUSTER,
           [
               ARG_FORCE,
               ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
@@ -178,7 +178,12 @@
           StatusKeys.INFO_CONTAINERS_AM_RESTART)
       assert restarted != null
       assert Integer.parseInt(restarted) == 0
-      freeze(CLUSTER)
+      freeze(0, CLUSTER,
+          [
+              ARG_FORCE,
+              ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
+              ARG_MESSAGE, "final-shutdown"
+          ])
 
       destroy(0, CLUSTER)
 
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures2.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures2.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
index 7804042..0ba48ba 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures2.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
@@ -31,7 +31,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestAgentFailures2 extends AgentCommandTestBase
+public class AgentFailures2IT extends AgentCommandTestBase
 implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
 
   private static String COMMAND_LOGGER = "COMMAND_LOGGER"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index ea58d5f..a51c769 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAgentFailures.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -31,7 +31,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestAgentFailures extends AgentCommandTestBase
+public class AgentFailuresIT extends AgentCommandTestBase
 implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
 
   private static String COMMAND_LOGGER = "COMMAND_LOGGER"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAppsThroughAgent.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAppsThroughAgent.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
index 6b0f678..00a876a 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestAppsThroughAgent.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
@@ -31,7 +31,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestAppsThroughAgent extends AgentCommandTestBase
+public class AppsThroughAgentIT extends AgentCommandTestBase
 implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
 
   private static String COMMAND_LOGGER = "COMMAND_LOGGER"
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestClusterBuildDestroy.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/ClusterBuildDestroyIT.groovy
similarity index 97%
rename from slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestClusterBuildDestroy.groovy
rename to slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/ClusterBuildDestroyIT.groovy
index ead1601..f8caac5 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/TestClusterBuildDestroy.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/ClusterBuildDestroyIT.groovy
@@ -34,7 +34,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestClusterBuildDestroy extends AgentCommandTestBase
+public class ClusterBuildDestroyIT extends AgentCommandTestBase
     implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
 
 
diff --git a/slider-install/pom.xml b/slider-install/pom.xml
deleted file mode 100644
index b08895f..0000000
--- a/slider-install/pom.xml
+++ /dev/null
@@ -1,111 +0,0 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <artifactId>slider-install</artifactId>
-  <name>Slider Install</name>
-  <packaging>pom</packaging>
-  <description>
-    
-    Builds any RPMs and other install packaging for Slider.
-    This is downstream of the slider assembly and the app packages, so
-    that anything from these can be installed in the RPM.
-  </description>
-  <parent>
-    <groupId>org.apache.slider</groupId>
-    <artifactId>slider</artifactId>
-    <version>0.40</version>
-  </parent>
-
-  <properties>
-    <basedir>/usr/local/slider</basedir>
-    <confdir>${basedir}/conf</confdir>
-    <bindir>${basedir}/bin</bindir>
-  </properties>
-  
-  <build>
-    <plugins>
-      <!--read in a build.properties file if defined-->
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>properties-maven-plugin</artifactId>
-        <version>${maven.properties.version}</version>
-        <executions>
-          <execution>
-            <phase>initialize</phase>
-            <goals>
-              <goal>read-project-properties</goal>
-            </goals>
-            <configuration>
-              <quiet>true</quiet>
-              <files>
-                <file>build.properties</file>
-                <file>../build.properties</file>
-              </files>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-    </plugins>
-    
-    
-  </build>
-
-  <reporting>
-    <plugins>
- 
-
-
-    </plugins>
-  </reporting>
-
-  <dependencies>
-
-    <dependency>
-      <groupId>org.apache.slider</groupId>
-      <artifactId>slider-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <!--
-     needed to order the build and ensure the agent tar is found
-     the test scope ensures that it isn't copied into the lib dir
-     -->
-    <dependency>
-      <groupId>org.apache.slider</groupId>
-      <artifactId>slider-agent</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-      <type>tar.gz</type>
-    </dependency>
-
-    <dependency>
-      <groupId>com.beust</groupId>
-      <artifactId>jcommander</artifactId>
-    </dependency>
-
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-    </dependency>
- 
-  </dependencies>
-
-
-</project>
diff --git a/slider-providers/accumulo/accumulo-funtests/pom.xml b/slider-providers/accumulo/accumulo-funtests/pom.xml
index 1b25b33..14388f4 100644
--- a/slider-providers/accumulo/accumulo-funtests/pom.xml
+++ b/slider-providers/accumulo/accumulo-funtests/pom.xml
@@ -27,7 +27,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../../</relativePath>
   </parent>
 
@@ -59,14 +59,6 @@
       
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
@@ -85,8 +77,17 @@
      <!-- functional test -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <version>${maven-surefire-plugin.version}</version>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>${maven-failsafe-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>run-integration-tests</id>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
         <configuration>
           <!--mvn process fork options-->
           <reuseForks>${test.reuseForks}</reuseForks>
@@ -109,12 +110,6 @@
             <slider.conf.dir>${slider.conf.dir}</slider.conf.dir>
             <slider.bin.dir>../../../slider-assembly/target/slider-${project.version}-all/slider-${project.version}</slider.bin.dir>
           </systemPropertyVariables>
-          <includes>
-            <include>**/Test*.java</include>
-          </includes>
-          <excludes>
-            <exclude>**/Test*$*.java</exclude>
-          </excludes>
         </configuration>
       </plugin>
  
@@ -148,6 +143,16 @@
     <dependency>
       <groupId>org.apache.slider</groupId>
       <artifactId>slider-core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.slider</groupId>
+      <artifactId>slider-core</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
@@ -212,9 +217,22 @@
 
     <dependency>
       <groupId>org.apache.accumulo</groupId>
+      <artifactId>accumulo-fate</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.accumulo</groupId>
       <artifactId>accumulo-test</artifactId>
       <scope>test</scope>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <version>0.9.0</version>
+      <scope>test</scope>
+    </dependency>
     
   </dependencies>
 
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/TestStub.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/TestStub.groovy
deleted file mode 100644
index 3d9abb7..0000000
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/TestStub.groovy
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.slider.providers.accumulo
-
-import org.junit.Test
-
-/**
- *  this is here to ensure there is always a test
- */
-class TestStub {
-
-  @Test
-  public void testStubTest() throws Throwable {
-
-  }
-}
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloBuildSetup.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloBuildSetupIT.groovy
similarity index 91%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloBuildSetup.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloBuildSetupIT.groovy
index 61366da..109bce9 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloBuildSetup.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloBuildSetupIT.groovy
@@ -22,12 +22,11 @@
 import org.apache.slider.funtest.abstracttests.AbstractTestBuildSetup
 import org.junit.Test
 
-class TestAccumuloBuildSetup extends AbstractTestBuildSetup {
+class AccumuloBuildSetupIT extends AbstractTestBuildSetup {
 
   @Test
   public void testAccumuloBuildsHavePathsDefined() throws Throwable {
     Configuration conf = loadSliderConf();
-    assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
 
     assumeBoolOption(conf, KEY_TEST_ACCUMULO_ENABLED, true)
 
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloCI.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCIIT.groovy
similarity index 91%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloCI.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCIIT.groovy
index 5573dd4..4ec5ff1 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestAccumuloCI.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCIIT.groovy
@@ -16,7 +16,6 @@
  */
 package org.apache.slider.providers.accumulo.funtest
 
-import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 
 import org.apache.accumulo.core.client.Connector
@@ -34,12 +33,8 @@
 import org.apache.slider.funtest.framework.FuntestProperties
 import org.apache.slider.funtest.framework.PortAssignments
 
-/**
- * 
- */
-@CompileStatic
 @Slf4j
-class TestAccumuloCI extends TestFunctionalAccumuloCluster {
+class AccumuloCIIT extends FunctionalAccumuloClusterIT {
   
   @Override
   String getClusterName() {
@@ -67,7 +62,6 @@
     String zookeepers = SLIDER_CONFIG.get(SliderXmlConfKeys.REGISTRY_ZK_QUORUM,
         FuntestProperties.DEFAULT_SLIDER_ZK_HOSTS)
     ZooKeeperInstance inst = new ZooKeeperInstance(currentUser + "-" + clustername, zookeepers)
-    PasswordToken passwd = new PasswordToken(getPassword())
     Connector conn = inst.getConnector("root", new PasswordToken(getPassword()))
     
     // Create the test table with some split points
@@ -83,7 +77,7 @@
     String[] ciOpts = ["-i", inst.getInstanceName(),
       "-z", zookeepers, "-u", "root",
       "-p", getPassword(), "--table", tableName,
-      "--num", Integer.toString(1000 * 1000 * 15 * getNumTservers()),
+      "--num", Integer.toString(1000 * 1000 * 4 * getNumTservers()),
       "--batchMemory", "100000000",
       "--batchLatency", "600000",
       "--batchThreads", "1"]
@@ -95,7 +89,7 @@
     Path verifyOutput = new Path("/user/" + currentUser + "/.slider/cluster/" + clustername + "/verify-output")
     assert !clusterFS.exists(verifyOutput)
     
-    YarnConfiguration verifyConf = new YarnConfiguration(CommandTestBase.SLIDER_CONFIG);
+    YarnConfiguration verifyConf = new YarnConfiguration(SLIDER_CONFIG);
 
         // Try to load the necessary classes for the Mappers to find them
     if (loadClassesForMapReduce(verifyConf)) {
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy
index 1b5d8bf..f050793 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/AccumuloCommandTestBase.groovy
@@ -18,6 +18,13 @@
 
 package org.apache.slider.providers.accumulo.funtest
 
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.slider.common.SliderExitCodes
+import org.apache.slider.common.params.SliderActions
+import org.apache.slider.funtest.framework.FileUploader
+import org.apache.slider.providers.accumulo.AccumuloClientProvider
+import org.junit.BeforeClass
+
 import static SliderXMLConfKeysForTesting.KEY_TEST_ACCUMULO_APPCONF
 import static SliderXMLConfKeysForTesting.KEY_TEST_ACCUMULO_TAR
 import static org.apache.slider.api.ResourceKeys.YARN_MEMORY
@@ -47,10 +54,14 @@
 /**
  * Anything specific to accumulo tests
  */
-abstract class AccumuloCommandTestBase extends CommandTestBase {
+abstract class AccumuloCommandTestBase extends CommandTestBase
+  implements SliderExitCodes, SliderActions {
 
   public static final int ACCUMULO_LAUNCH_WAIT_TIME
   public static final boolean ACCUMULO_TESTS_ENABLED
+  public static final FileUploader uploader
+  public Path ACCUMULO_TAR
+  public Path ACCUMULO_CONF
 
   static {
     ACCUMULO_LAUNCH_WAIT_TIME = getTimeOptionMillis(SLIDER_CONFIG,
@@ -58,17 +69,16 @@
         1000 * DEFAULT_ACCUMULO_LAUNCH_TIME_SECONDS)
     ACCUMULO_TESTS_ENABLED =
         SLIDER_CONFIG.getBoolean(KEY_TEST_ACCUMULO_ENABLED, false)
+    uploader = new FileUploader(SLIDER_CONFIG, UserGroupInformation.currentUser)
   }
 
 
   public static void assumeAccumuloTestsEnabled() {
-    assumeFunctionalTestsEnabled()
     assume(ACCUMULO_TESTS_ENABLED, "Accumulo tests disabled")
   }
   
-  @Before
-  public void verifyPreconditions() {
-
+  @BeforeClass
+  public static void verifyPreconditions() {
     //if tests are not enabled: skip tests
     assumeAccumuloTestsEnabled()
     // but if they are -fail if the values are missing
@@ -76,6 +86,29 @@
     getRequiredConfOption(SLIDER_CONFIG, OPTION_HADOOP_HOME)
   }
 
+  @BeforeClass
+  public static void extendClasspath() {
+    addExtraJar(AccumuloClientProvider)
+  }
+
+  @Before
+  public void uploadFiles() {
+    File tar = new File(getRequiredConfOption(SLIDER_CONFIG,
+      KEY_TEST_ACCUMULO_TAR))
+    File conf = new File(getRequiredConfOption(SLIDER_CONFIG,
+      KEY_TEST_ACCUMULO_APPCONF))
+
+    //create the home dir or fail
+    Path home = uploader.mkHomeDir()
+
+    ACCUMULO_TAR = new Path(home, tar.getName())
+    ACCUMULO_CONF = new Path(home, "accumulo-conf")
+
+    // Upload the local accumulo tarball and conf directory to hdfs
+    uploader.copyIfOutOfDate(tar, ACCUMULO_TAR, false)
+    uploader.copyIfOutOfDate(conf, ACCUMULO_CONF, false)
+  }
+
   /**
    * Create an accumulo cluster
    *
@@ -102,12 +135,10 @@
     clusterOps[OPTION_HADOOP_HOME] = getRequiredConfOption(
         SLIDER_CONFIG,
         OPTION_HADOOP_HOME)
-    argsList << Arguments.ARG_IMAGE <<
-    getRequiredConfOption(SLIDER_CONFIG, KEY_TEST_ACCUMULO_TAR)
+    argsList << Arguments.ARG_IMAGE << ACCUMULO_TAR
 
-    argsList << Arguments.ARG_CONFDIR <<
-    getRequiredConfOption(SLIDER_CONFIG, KEY_TEST_ACCUMULO_APPCONF)
-    
+    argsList << Arguments.ARG_CONFDIR << ACCUMULO_CONF
+
     argsList << Arguments.ARG_OPTION << AccumuloKeys.OPTION_ACCUMULO_PASSWORD << password
 
     argsList << ARG_RES_COMP_OPT << ROLE_MASTER <<
@@ -125,7 +156,7 @@
                              blockUntilRunning,
                              clusterOps)
   }
-                                         
+
   public boolean loadClassesForMapReduce(Configuration conf) {
     String[] neededClasses = [AccumuloInputFormat.class.getName(), TException.class.getName(), ZooStore.class.getName(), Tracer.class.getName()]
     String[] neededJars = ["accumulo-core.jar", "libthrift.jar", "accumulo-fate.jar", "accumulo-trace.jar"]
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloCluster.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloClusterIT.groovy
similarity index 96%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloCluster.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloClusterIT.groovy
index 06fe21c..ca7cc65 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloCluster.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloClusterIT.groovy
@@ -18,7 +18,6 @@
 
 import static org.apache.slider.providers.accumulo.AccumuloConfigFileOptions.*
 import static org.apache.slider.providers.accumulo.AccumuloKeys.*
-import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 
 import org.apache.slider.common.SliderExitCodes
@@ -33,9 +32,8 @@
 /**
  * 
  */
-@CompileStatic
 @Slf4j
-class TestFunctionalAccumuloCluster extends AccumuloCommandTestBase
+class FunctionalAccumuloClusterIT extends AccumuloCommandTestBase
     implements FuntestProperties, Arguments, SliderExitCodes {
 
       
@@ -108,9 +106,10 @@
         extraArgs,
         true,
         clusterOps,
-        "256",
+        "128",
         getPassword()
         )
+    ensureApplicationIsUp(getClusterName())
 
     //get a slider client against the cluster
     SliderClient sliderClient = bondToCluster(SLIDER_CONFIG, getClusterName())
diff --git a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloM1T1GC1Mon1.groovy b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloM1T1GC1Mon1IT.groovy
similarity index 92%
rename from slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloM1T1GC1Mon1.groovy
rename to slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloM1T1GC1Mon1IT.groovy
index 4f07b40..33910d4 100644
--- a/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/TestFunctionalAccumuloM1T1GC1Mon1.groovy
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/groovy/org/apache/slider/providers/accumulo/funtest/FunctionalAccumuloM1T1GC1Mon1IT.groovy
@@ -18,15 +18,13 @@
 
 package org.apache.slider.providers.accumulo.funtest
 
-import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
 
-@CompileStatic
 @Slf4j
-public class TestFunctionalAccumuloM1T1GC1Mon1 extends TestFunctionalAccumuloCluster {
+public class FunctionalAccumuloM1T1GC1Mon1IT extends FunctionalAccumuloClusterIT {
 
   @Override
   public String getClusterName() {
diff --git a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java b/slider-providers/accumulo/accumulo-funtests/src/test/java/org/apache/slider/providers/accumulo/funtest/StubToForceGroovyTestsToCompile.java
similarity index 88%
rename from slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
rename to slider-providers/accumulo/accumulo-funtests/src/test/java/org/apache/slider/providers/accumulo/funtest/StubToForceGroovyTestsToCompile.java
index eefccbb..1a948a2 100644
--- a/slider-funtest/src/main/java/org/apache/slider/funtest/accumulo/StubToForceGroovySrcToCompile.java
+++ b/slider-providers/accumulo/accumulo-funtests/src/test/java/org/apache/slider/providers/accumulo/funtest/StubToForceGroovyTestsToCompile.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.slider.funtest.accumulo;
+package org.apache.slider.providers.accumulo.funtest;
 
-class StubToForceGroovySrcToCompile {
+class StubToForceGroovyTestsToCompile {
 }
diff --git a/slider-providers/accumulo/slider-accumulo-provider/pom.xml b/slider-providers/accumulo/slider-accumulo-provider/pom.xml
index cabea00..bb5434a 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/pom.xml
+++ b/slider-providers/accumulo/slider-accumulo-provider/pom.xml
@@ -28,7 +28,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../../</relativePath>
   </parent>
 
@@ -68,14 +68,6 @@
       
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java
index db99360..7f99573 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloClientProvider.java
@@ -23,6 +23,7 @@
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.common.SliderXmlConfKeys;
 import org.apache.slider.api.OptionKeys;
@@ -135,7 +136,7 @@
 
     propagateClientFSBinding(sitexml);
     setDatabasePath(sitexml,
-                    globalInstanceOptions.getMandatoryOption(OptionKeys.INTERNAL_DATA_DIR_PATH));
+                    globalInstanceOptions.getMandatoryOption(InternalKeys.INTERNAL_DATA_DIR_PATH));
 
 
     String quorum =
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java
index c511efb..b8f4c00 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/main/java/org/apache/slider/providers/accumulo/AccumuloProviderService.java
@@ -26,6 +26,7 @@
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.OptionKeys;
@@ -161,7 +162,7 @@
     //Add binaries
     //now add the image if it was set
     String imageURI = instanceDefinition.getInternalOperations()
-                                        .get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+                                        .get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
     fileSystem.maybeAddImagePath(launcher.getLocalResources(), imageURI);
 
     CommandLineBuilder commandLine = new CommandLineBuilder();
@@ -169,6 +170,8 @@
     String heap = "-Xmx" + appComponent.getOption(RoleKeys.JVM_HEAP, DEFAULT_JVM_HEAP);
     String opt = "ACCUMULO_OTHER_OPTS";
     if (SliderUtils.isSet(heap)) {
+/* JDK7
+
       switch (role) {
         case AccumuloKeys.ROLE_MASTER:
           opt = "ACCUMULO_MASTER_OPTS";
@@ -183,6 +186,16 @@
           opt = "ACCUMULO_GC_OPTS";
           break;
       }
+*/
+      if (AccumuloKeys.ROLE_MASTER.equals(role)) {
+        opt = "ACCUMULO_MASTER_OPTS";
+      } else if (AccumuloKeys.ROLE_TABLET.equals(role)) {
+        opt = "ACCUMULO_TSERVER_OPTS";
+      } else if (AccumuloKeys.ROLE_MONITOR.equals(role)) {
+        opt = "ACCUMULO_MONITOR_OPTS";
+      } else if (AccumuloKeys.ROLE_GARBAGE_COLLECTOR.equals(role)) {
+        opt = "ACCUMULO_GC_OPTS";
+      }
       launcher.setEnv(opt, heap);
     }
 
@@ -238,7 +251,7 @@
 
 
     String accumuloScript = AccumuloClientProvider.buildScriptBinPath(instance);
-    List<String> launchSequence = new ArrayList<>(8);
+    List<String> launchSequence = new ArrayList<String>(8);
     launchSequence.add(0, accumuloScript);
     Collections.addAll(launchSequence, commands);
     return launchSequence;
@@ -333,11 +346,12 @@
     //callback to AM to trigger cluster review is set up to happen after
     //the init/verify action has succeeded
     int delay = internalOperations.getGlobalOptions().getOptionInt(
-        OptionKeys.INTERNAL_CONTAINER_STARTUP_DELAY,
-        OptionKeys.DEFAULT_CONTAINER_STARTUP_DELAY);
+        InternalKeys.INTERNAL_CONTAINER_STARTUP_DELAY,
+        InternalKeys.DEFAULT_INTERNAL_CONTAINER_STARTUP_DELAY);
     ProviderCompletedCallable completedCallable =
         new ProviderCompletedCallable(execInProgress, null);
-    Service notifier = new WorkflowCallbackService<>(
+    // JDK7
+    Service notifier = new WorkflowCallbackService(
         "accumulo notifier",
         completedCallable,
         delay,
@@ -364,7 +378,7 @@
     String dataDir = cd.getInternalOperations()
                                .getGlobalOptions()
                                .getMandatoryOption(
-                                 OptionKeys.INTERNAL_DATA_DIR_PATH);
+                                 InternalKeys.INTERNAL_DATA_DIR_PATH);
     Path accumuloInited = new Path(dataDir, INSTANCE_ID);
     FileSystem fs2 = FileSystem.get(accumuloInited.toUri(), getConf());
     return fs2.exists(accumuloInited);
@@ -388,7 +402,7 @@
   @Override
   public Map<String, String> buildProviderStatus() {
     
-    Map<String,String> status = new HashMap<>();
+    Map<String,String> status = new HashMap<String, String>();
     
     
     return status;
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy
index 1e2e27f..bf35207 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/AccumuloTestBase.groovy
@@ -195,7 +195,7 @@
       String clustername, List<Map<String, Integer>> plan) {
     int planCount = plan.size()
     assert planCount > 0
-    createMiniCluster(clustername, getConfiguration(),
+    createMiniCluster(clustername, configuration,
         1,
         true);
     //now launch the cluster
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy
index 348ccd4..2333fdf 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccCorrectInstanceName.groovy
@@ -39,11 +39,11 @@
 
   @Test
   public void testAccM1T1GC1Mon1() throws Throwable {
-    String clustername = "test_acc_m1t1gc1mon1"
     int tablets = 1
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+    String clustername = createMiniCluster( "",
+        configuration, 1, 1, 1, true, false)
     describe(" Create an accumulo cluster");
 
     //make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy
index 166c6c4..18c00f5 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets133Mgr113.groovy
@@ -29,8 +29,7 @@
 
   @Test
   public void testAccFlexTablets133Mgr113() throws Throwable {
-    ClusterDescription cd = flexAccClusterTestRun(
-        "test_acc_flex_tablets133mgr113",
+    ClusterDescription cd = flexAccClusterTestRun(createClusterName(),
         [
             [
                 (AccumuloKeys.ROLE_MASTER) : 1,
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy
index b084cdc..2e8f3c9 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFlexTablets1to3.groovy
@@ -44,7 +44,7 @@
         (AccumuloKeys.ROLE_GARBAGE_COLLECTOR): 1]
 
     ClusterDescription cd = flexAccClusterTestRun(
-        "test_acc_flex_tablets1to3",
+        createClusterName(),
         [plan1, plan2]
     )
 
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy
index 143974f..6da00fb 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccFreezeThaw.groovy
@@ -34,11 +34,11 @@
 
   @Test
   public void testAccFreezeThaw() throws Throwable {
-    String clustername = "test_acc_freeze_thaw"
     int tablets = 1
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, false)
+    String clustername = createMiniCluster("",
+        configuration, 1, 1, 1, true, false)
     describe(" Create an accumulo cluster");
 
     //make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy
index 2f744bb..8d5890c 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveHDFSArchive.groovy
@@ -36,11 +36,11 @@
 
   @Test
   public void testAccLiveHDFSArchive() throws Throwable {
-    String clustername = "test_acc_live_hdfs_archive"
     int tablets = 1
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, true)
     describe(" Create an accumulo cluster from an archive");
 
     enableTestRunAgainstUploadedArchive();
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy
index 98b8b44..df0bbd9 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccLiveLocalArchive.groovy
@@ -36,11 +36,11 @@
 
   @Test
   public void testAccLiveLocalArchive() throws Throwable {
-    String clustername = "test_acc_live_local_archive"
     int tablets = 1
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, false)
     describe(" Create an accumulo cluster from an archive");
 
     //image mode
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy
index 7074294..d94eb36 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM1T1GC1Mon1.groovy
@@ -35,11 +35,10 @@
 
   @Test
   public void testAccM1T1GC1Mon1() throws Throwable {
-    String clustername = "test_acc_m1t1gc1mon1"
     int tablets = 1
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+    String clustername = createMiniCluster( "", configuration, 1, 1, 1, true, false)
     describe(" Create an accumulo cluster");
 
     //make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy
index 253192b..2ed50f1 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccM2T2GC1Mon1.groovy
@@ -36,12 +36,12 @@
 
   @Test
   public void testAccM1T1GC1Mon1() throws Throwable {
-    String clustername = "test_acc_m2t2gc1mon1"
     int master = 2
     int tablets = 2
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, false)
     describe(" Create an accumulo cluster");
 
     //make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy
index df40df7..4596b12 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestAccumuloAMWebApp.groovy
@@ -38,11 +38,11 @@
 
   @Test
   public void testAccumuloAMWebApp() throws Throwable {
-    String clustername = "test_accumulo_am_webapp"
     int tablets = 1
     int monitor = 1
     int gc = 1
-    createMiniCluster(clustername, getConfiguration(), 1, 1, 1, true, false)
+    String clustername = createMiniCluster( "",
+        configuration, 1, 1, 1, true, false)
     describe(" Create an accumulo cluster");
 
     //make sure that ZK is up and running at the binding string
diff --git a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy
index d86a158..dca0c6b 100644
--- a/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy
+++ b/slider-providers/accumulo/slider-accumulo-provider/src/test/groovy/org/apache/slider/providers/accumulo/live/TestInvalidMonitorAddress.groovy
@@ -38,8 +38,7 @@
 
   @Test
   public void testInvalidMonitorAddress() throws Throwable {
-    String clustername = "test_invalid_monitor_address"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that bad Java heap options are picked up"
     
diff --git a/slider-providers/hbase/hbase-funtests/pom.xml b/slider-providers/hbase/hbase-funtests/pom.xml
index f5a155b..857ef03 100644
--- a/slider-providers/hbase/hbase-funtests/pom.xml
+++ b/slider-providers/hbase/hbase-funtests/pom.xml
@@ -27,7 +27,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../../</relativePath>
   </parent>
 
@@ -59,14 +59,6 @@
       
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
@@ -85,8 +77,17 @@
      <!-- functional test -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <version>${maven-surefire-plugin.version}</version>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>${maven-failsafe-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>run-integration-tests</id>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
         <configuration>
           
           <!--mvn process fork options-->
@@ -107,15 +108,9 @@
             <java.security.krb5.realm>${slider.test.java.security.krb5.realm}</java.security.krb5.realm>
             <java.security.krb5.kdc>${slider.test.java.security.krb5.kdc}</java.security.krb5.kdc>
             <!-- this property must be supplied-->
-            <slider.conf.dir>../../../src/test/clusters/offline/slider</slider.conf.dir>
+            <slider.conf.dir>${slider.conf.dir}</slider.conf.dir>
             <slider.bin.dir>../../../slider-assembly/target/slider-${project.version}-all/slider-${project.version}</slider.bin.dir>
           </systemPropertyVariables>
-          <includes>
-            <include>**/Test*.java</include>
-          </includes>
-          <excludes>
-            <exclude>**/Test*$*.java</exclude>
-          </excludes>
         </configuration>
       </plugin>
  
@@ -145,6 +140,11 @@
     <dependency>
       <groupId>org.apache.slider</groupId>
       <artifactId>slider-core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.slider</groupId>
+      <artifactId>slider-core</artifactId>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy
index 9385cc9..1bef7d3 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/DemoHBaseCluster.groovy
@@ -22,7 +22,7 @@
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.client.SliderClient
 
-class DemoHBaseCluster extends TestFunctionalHBaseCluster {
+class DemoHBaseCluster extends FunctionalHBaseClusterIT {
 
 
   @Override
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestFunctionalHBaseCluster.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/FunctionalHBaseClusterIT.groovy
similarity index 98%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestFunctionalHBaseCluster.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/FunctionalHBaseClusterIT.groovy
index 3415b35..84e55f7 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestFunctionalHBaseCluster.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/FunctionalHBaseClusterIT.groovy
@@ -49,7 +49,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestFunctionalHBaseCluster extends HBaseCommandTestBase
+public class FunctionalHBaseClusterIT extends HBaseCommandTestBase
     implements FuntestProperties, Arguments, SliderExitCodes {
 
 
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseBuildSetup.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseBuildSetupIT.groovy
similarity index 91%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseBuildSetup.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseBuildSetupIT.groovy
index c8f3be3..0dcffde 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseBuildSetup.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseBuildSetupIT.groovy
@@ -22,12 +22,11 @@
 import org.apache.slider.funtest.abstracttests.AbstractTestBuildSetup
 import org.junit.Test
 
-class TestHBaseBuildSetup extends AbstractTestBuildSetup {
+class HBaseBuildSetupIT extends AbstractTestBuildSetup {
 
   @Test
   public void testHBaseBuildsHavePathsDefined() throws Throwable {
     Configuration conf = loadSliderConf();
-    assumeBoolOption(conf, KEY_SLIDER_FUNTESTS_ENABLED, true)
 
     assumeBoolOption(conf, KEY_TEST_HBASE_ENABLED, true)
 
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterBuildDestroy.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterBuildDestroyIT.groovy
similarity index 95%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterBuildDestroy.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterBuildDestroyIT.groovy
index 3c9b8ed..3a44e30 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterBuildDestroy.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterBuildDestroyIT.groovy
@@ -34,7 +34,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestHBaseClusterBuildDestroy extends HBaseCommandTestBase
+public class HBaseClusterBuildDestroyIT extends HBaseCommandTestBase
     implements FuntestProperties, Arguments {
 
 
@@ -43,7 +43,6 @@
 
   @BeforeClass
   public static void prepareCluster() {
-    assumeFunctionalTestsEnabled();
     setupCluster(CLUSTER)
   }
 
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterLifecycle.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterLifecycleIT.groovy
similarity index 91%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterLifecycle.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterLifecycleIT.groovy
index 01c7131..63b5fb6 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseClusterLifecycle.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseClusterLifecycleIT.groovy
@@ -34,7 +34,7 @@
 
 @CompileStatic
 @Slf4j
-public class TestHBaseClusterLifecycle extends HBaseCommandTestBase
+public class HBaseClusterLifecycleIT extends HBaseCommandTestBase
     implements FuntestProperties, Arguments, SliderExitCodes {
 
 
@@ -118,11 +118,13 @@
       log.info("Connected via Client {}", sliderClient.toString())
 
       //freeze
-      freeze(CLUSTER, [
+      def frozen = freeze(0, CLUSTER, [
           ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
-          ARG_MESSAGE, "freeze-in-test cluster lifecycle"
+          ARG_MESSAGE, "freeze-in-test-cluster-lifecycle"
       ])
+      frozen.assertExitCode(0)
 
+//      sleep(FREEZE_WAIT_TIME)
       //cluster exists if you don't want it to be live
       exists(0, CLUSTER, false)
       // condition returns false if it is required to be live
@@ -136,7 +138,7 @@
               ARG_WAIT, Integer.toString(THAW_WAIT_TIME),
           ])
       exists(0, CLUSTER)
-      freeze(CLUSTER,
+      freeze(0, CLUSTER,
           [
               ARG_FORCE,
               ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
@@ -165,7 +167,13 @@
           StatusKeys.INFO_CONTAINERS_AM_RESTART)
       assert restarted != null
       assert Integer.parseInt(restarted) == 0
-      freeze(CLUSTER)
+      freeze(0, CLUSTER,
+          [
+              ARG_FORCE,
+              ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
+              ARG_MESSAGE, "teardown-freeze"
+          ])
+      
 
       destroy(0, CLUSTER)
 
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy
index 8bad590..caaab04 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseCommandTestBase.groovy
@@ -52,7 +52,6 @@
 
   @BeforeClass
   public static void extendClasspath() {
-    assumeFunctionalTestsEnabled()
     addExtraJar(HBaseClientProvider)
   }
 
@@ -65,7 +64,6 @@
 
 
   public void assumeHBaseTestsEnabled() {
-    assumeFunctionalTestsEnabled()
     assume(HBASE_TESTS_ENABLED, "HBase tests disabled")
   }
 
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseIntegration.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseIntegrationIT.groovy
similarity index 91%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseIntegration.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseIntegrationIT.groovy
index 4bf18f6..21a7494 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseIntegration.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseIntegrationIT.groovy
@@ -24,14 +24,17 @@
 import org.apache.hadoop.util.ToolRunner
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.client.SliderClient
-import org.apache.slider.providers.hbase.HBaseConfigFileOptions;
+import org.apache.slider.providers.hbase.HBaseConfigFileOptions
+import org.junit.Ignore;
 
 /* Runs IntegrationTestIngest on cluster
  *
  * Note: this test runs for about 20 minutes
  * please set slider.test.timeout.seconds accordingly
  */
-class TestHBaseIntegration extends TestFunctionalHBaseCluster {
+
+@Ignore("appears localhost only")
+class HBaseIntegrationIT extends FunctionalHBaseClusterIT {
 
   @Override
   String getClusterName() {
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseLoad.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseLoadIT.groovy
similarity index 90%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseLoad.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseLoadIT.groovy
index 3d6c46c..61bcc70 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseLoad.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseLoadIT.groovy
@@ -23,8 +23,10 @@
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.client.SliderClient
 import org.apache.slider.providers.hbase.HBaseConfigFileOptions
+import org.junit.Assume
+import org.junit.Ignore
 
-class TestHBaseLoad extends TestFunctionalHBaseCluster {
+class HBaseLoadIT extends FunctionalHBaseClusterIT {
 
   @Override
   String getClusterName() {
@@ -32,6 +34,11 @@
   }
 
   @Override
+  void testHBaseCreateCluster() throws Throwable {
+    super.testHBaseCreateCluster()
+  }
+
+  @Override
   void clusterOperations(
       String clustername,
       SliderClient sliderClient,
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseNodeFailure.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseNodeFailureIT.groovy
similarity index 98%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseNodeFailure.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseNodeFailureIT.groovy
index fab73b3..cd87fab 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestHBaseNodeFailure.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/HBaseNodeFailureIT.groovy
@@ -27,7 +27,7 @@
 import org.apache.slider.providers.hbase.HBaseKeys
 import org.apache.slider.providers.hbase.HBaseTestUtils
 
-class TestHBaseNodeFailure extends TestFunctionalHBaseCluster {
+class HBaseNodeFailureIT extends FunctionalHBaseClusterIT {
 
 
   public static final int RESTART_SLEEP_TIME = 5000
diff --git a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestImages.groovy b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/ImagesIT.groovy
similarity index 92%
rename from slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestImages.groovy
rename to slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/ImagesIT.groovy
index 128e087..d1b5c55 100644
--- a/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/TestImages.groovy
+++ b/slider-providers/hbase/hbase-funtests/src/test/groovy/org/apache/slider/providers/hbase/funtest/ImagesIT.groovy
@@ -26,12 +26,11 @@
 import org.junit.Before
 import org.junit.Test
 
-class TestImages extends CommandTestBase implements FuntestProperties {
+class ImagesIT extends CommandTestBase implements FuntestProperties {
 
 
   @Before
   public void verifyPreconditions() {
-    assumeBoolOption(SLIDER_CONFIG, KEY_SLIDER_FUNTESTS_ENABLED, true)
     assumeBoolOption(SLIDER_CONFIG, KEY_TEST_HBASE_ENABLED, true)
   }
   
diff --git a/slider-providers/hbase/slider-hbase-provider/pom.xml b/slider-providers/hbase/slider-hbase-provider/pom.xml
index 381a1b6..b93ce14 100644
--- a/slider-providers/hbase/slider-hbase-provider/pom.xml
+++ b/slider-providers/hbase/slider-hbase-provider/pom.xml
@@ -29,7 +29,7 @@
   <parent>
     <groupId>org.apache.slider</groupId>
     <artifactId>slider</artifactId>
-    <version>0.40</version>
+    <version>0.50.0-incubating</version>
     <relativePath>../../../</relativePath>
   </parent>
 
@@ -47,14 +47,6 @@
       
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
-        <version>${maven-compiler-plugin.version}</version>
-        <configuration>
-          <compilerId>groovy-eclipse-compiler</compilerId>
-          <!-- set verbose to be true if you want lots of uninteresting messages -->
-          <!-- <verbose>true</verbose> -->
-          <source>${project.java.src.version}</source>
-          <target>${project.java.src.version}</target>
-        </configuration>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.groovy</groupId>
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java
index c40c5f2..9ad872f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseClientProvider.java
@@ -21,6 +21,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.common.SliderXmlConfKeys;
 import org.apache.slider.api.OptionKeys;
@@ -107,7 +108,7 @@
 
     MapOperations worker = appconf.getMandatoryComponent(HBaseKeys.ROLE_WORKER);
     
-    Map<String, String> sitexml = new HashMap<>();
+    Map<String, String> sitexml = new HashMap<String, String>();
 
     //map all cluster-wide site. options
     providerUtils.propagateSiteOptions(globalAppOptions, sitexml);
@@ -122,7 +123,7 @@
 
     sitexml.put(KEY_HBASE_ROOTDIR,
         globalInstanceOptions.getMandatoryOption(
-            OptionKeys.INTERNAL_DATA_DIR_PATH)
+            InternalKeys.INTERNAL_DATA_DIR_PATH)
     );
     providerUtils.propagateOption(globalAppOptions, OptionKeys.ZOOKEEPER_PATH,
                                   sitexml, KEY_ZNODE_PARENT);
@@ -209,7 +210,7 @@
     }
   }
 
-  private static Set<String> knownRoleNames = new HashSet<>();
+  private static Set<String> knownRoleNames = new HashSet<String>();
   static {
     List<ProviderRole> roles = HBaseRoles.getRoles();
     knownRoleNames.add(SliderKeys.COMPONENT_AM);
@@ -239,6 +240,12 @@
     providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_MASTER,
                                     0, -1);
 
+    providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_REST_GATEWAY,
+      0, -1);
+    providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_THRIFT_GATEWAY,
+      0, -1);
+    providerUtils.validateNodeCount(instanceDefinition, HBaseKeys.ROLE_THRIFT2_GATEWAY,
+      0, -1);
   }
 
   @Override
@@ -256,7 +263,7 @@
 
     // add any and all dependency files
     Map<String, LocalResource> providerResources =
-        new HashMap<>();
+        new HashMap<String, LocalResource>();
 
     ProviderUtils.addProviderJar(providerResources,
         this,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java
index 1d6ca70..2a20438 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseKeys.java
@@ -26,9 +26,23 @@
   
   String ROLE_MASTER = MASTER;
 
+  String ROLE_REST_GATEWAY = "REST";
+  
+  String ROLE_THRIFT_GATEWAY = "THRIFT";
+  
+  String ROLE_THRIFT2_GATEWAY = "THRIFT2";
+  
   /** {@value */
   String REGION_SERVER = "regionserver";
 
+  /** {@value */
+  String REST_GATEWAY = "rest";
+
+  /** {@value */
+  String THRIFT_GATEWAY = "thrift";
+  /** {@value */
+  String THRIFT2_GATEWAY = "thrift2";
+  /**
   /**
    * What is the command for hbase to print a version: {@value}
    */
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
index f9a5628..82e535f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
@@ -22,9 +22,9 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.slider.api.InternalKeys;
 import org.apache.slider.common.SliderKeys;
 import org.apache.slider.api.ClusterDescription;
-import org.apache.slider.api.OptionKeys;
 import org.apache.slider.api.RoleKeys;
 import org.apache.slider.api.StatusKeys;
 import org.apache.slider.core.conf.AggregateConf;
@@ -68,11 +68,8 @@
  * This class implements the server-side aspects
  * of an HBase Cluster
  */
-public class HBaseProviderService extends AbstractProviderService implements
-                                                                  ProviderCore,
-                                                                  HBaseKeys,
-    SliderKeys,
-    AgentRestOperations{
+public class HBaseProviderService extends AbstractProviderService 
+    implements ProviderCore, HBaseKeys, SliderKeys, AgentRestOperations{
 
   protected static final Logger log =
     LoggerFactory.getLogger(HBaseProviderService.class);
@@ -109,8 +106,8 @@
    * @param instanceDefinition the instance definition to validate
    */
   @Override // Client and Server
-  public void validateInstanceDefinition(AggregateConf instanceDefinition) throws
-      SliderException {
+  public void validateInstanceDefinition(AggregateConf instanceDefinition) 
+      throws SliderException {
     clientProvider.validateInstanceDefinition(instanceDefinition);
   }
 
@@ -152,7 +149,7 @@
     //Add binaries
     //now add the image if it was set
     String imageURI = instanceDefinition.getInternalOperations()
-                  .get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH);
+                  .get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH);
     coreFS.maybeAddImagePath(launcher.getLocalResources(), imageURI);
 
     CommandLineBuilder cli = new CommandLineBuilder();
@@ -180,6 +177,8 @@
     String roleCommand;
     String logfile;
     //now look at the role
+
+/* JDK7
     switch (role) {
       case ROLE_WORKER:
         //role is region server
@@ -191,10 +190,52 @@
 
         logfile = "/master.txt";
         break;
+      case ROLE_REST_GATEWAY:
+        roleCommand = REST_GATEWAY;
+
+        logfile = "/rest-gateway.txt";
+        break;
+      case ROLE_THRIFT_GATEWAY:
+        roleCommand = THRIFT_GATEWAY;
+
+        logfile = "/thrift-gateway.txt";
+        break;
+      case ROLE_THRIFT2_GATEWAY:
+        roleCommand = THRIFT2_GATEWAY;
+
+        logfile = "/thrift2-gateway.txt";
+        break;
       default:
         throw new SliderInternalStateException("Cannot start role %s", role);
     }
 
+*/
+    if (ROLE_WORKER.equals(role)) {
+      //role is region server
+      roleCommand = REGION_SERVER;
+      logfile = "/region-server.txt";
+      
+    } else if (ROLE_MASTER.equals(role)) {
+      roleCommand = MASTER;
+      logfile = "/master.txt";
+
+    } else if (ROLE_REST_GATEWAY.equals(role)) {
+      roleCommand = REST_GATEWAY;
+      logfile = "/rest-gateway.txt";
+
+    } else if (ROLE_THRIFT_GATEWAY.equals(role)) {
+      roleCommand = THRIFT_GATEWAY;
+      logfile = "/thrift-gateway.txt";
+
+    } else if (ROLE_THRIFT2_GATEWAY.equals(role)) {
+      roleCommand = THRIFT2_GATEWAY;
+      logfile = "/thrift2-gateway.txt";
+    }
+    
+    else {
+      throw new SliderInternalStateException("Cannot start role %s", role);
+    }
+    
     cli.add(roleCommand);
     cli.add(ACTION_START);
     //log details
@@ -304,7 +345,7 @@
    * @return the provider status - map of entries to add to the info section
    */
   public Map<String, String> buildProviderStatus() {
-    Map<String, String> stats = new HashMap<>();
+    Map<String, String> stats = new HashMap<String, String>();
 
     return stats;
   }
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java
index 01776f7..b2825b8 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseRoles.java
@@ -37,12 +37,20 @@
 
   public static final int KEY_MASTER = SliderKeys.ROLE_AM_PRIORITY_INDEX + 2;
 
+  public static final int KEY_REST_GATEWAY = SliderKeys.ROLE_AM_PRIORITY_INDEX + 3;
+
+  public static final int KEY_THRIFT_GATEWAY = SliderKeys.ROLE_AM_PRIORITY_INDEX + 4;
+
+  public static final int KEY_THRIFT2_GATEWAY = SliderKeys.ROLE_AM_PRIORITY_INDEX + 5;
   /**
    * Initialize role list
    */
   static {
     ROLES.add(new ProviderRole(HBaseKeys.ROLE_WORKER, KEY_WORKER));
     ROLES.add(new ProviderRole(HBaseKeys.ROLE_MASTER, KEY_MASTER));
+    ROLES.add(new ProviderRole(HBaseKeys.ROLE_REST_GATEWAY, KEY_REST_GATEWAY));
+    ROLES.add(new ProviderRole(HBaseKeys.ROLE_THRIFT_GATEWAY, KEY_THRIFT_GATEWAY));
+    ROLES.add(new ProviderRole(HBaseKeys.ROLE_THRIFT_GATEWAY, KEY_THRIFT2_GATEWAY));
   }
 
 
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy
index 3f49771..6a69e17 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/HBaseMiniClusterTestBase.groovy
@@ -26,7 +26,6 @@
 import org.apache.slider.api.ClusterDescription
 import org.apache.slider.api.ClusterNode
 import org.apache.slider.api.ResourceKeys
-import org.apache.slider.providers.hbase.HBaseKeys
 import org.apache.slider.client.SliderClient
 import org.apache.slider.core.main.ServiceLauncher
 import org.apache.slider.providers.hbase.HBaseTestUtils
@@ -214,9 +213,7 @@
         (ROLE_MASTER): masters,
         (ROLE_WORKER): workers,
     ];
-    extraArgs << ARG_RES_COMP_OPT << ROLE_MASTER << ResourceKeys.YARN_MEMORY << YRAM
-    extraArgs << ARG_RES_COMP_OPT << ROLE_WORKER << ResourceKeys.YARN_MEMORY << YRAM
-    extraArgs << ARG_PROVIDER << PROVIDER_HBASE;
+    hbaseArgs(extraArgs)
     
     return createCluster(clustername,
         roles,
@@ -226,6 +223,15 @@
         [:])
   }
 
+  public List<String> hbaseArgs(List<String> extraArgs) {
+    extraArgs << ARG_RES_COMP_OPT << ROLE_MASTER << ResourceKeys.YARN_MEMORY <<
+    YRAM
+    extraArgs << ARG_RES_COMP_OPT << ROLE_WORKER << ResourceKeys.YARN_MEMORY <<
+    YRAM
+    extraArgs << ARG_PROVIDER << PROVIDER_HBASE;
+    return extraArgs;
+  }
+
   /**
    * Create an AM without a master
    * @param clustername AM name
@@ -241,9 +247,7 @@
     ];
     return createCluster(clustername,
         roles,
-        [
-            ARG_PROVIDER, PROVIDER_HBASE
-        ],
+        hbaseArgs([]),
         deleteExistingData,
         blockUntilRunning,
         [:])
@@ -278,62 +282,94 @@
       int masters,
       int masterFlexTarget,
       int workers,
-      int flexTarget,
+      int workerFlexTarget,
       boolean testHBaseAfter) {
-    createMiniCluster(clustername, configuration,
-                      1,
-                      true);
+    clustername = buildClustername(clustername);
+    SliderClient sliderClient = startHBaseCluster(clustername, masters, workers)
+
+    //now flex
+    return flexCluster(
+        sliderClient,
+        clustername,
+        masterFlexTarget,
+        workerFlexTarget,
+        testHBaseAfter)
+
+  }
+
+  public SliderClient startHBaseCluster(
+      String clustername,
+      int masters,
+      int workers) {
+    clustername = createMiniCluster(clustername, configuration,
+        1,
+        true);
     //now launch the cluster
     SliderClient sliderClient;
     ServiceLauncher<SliderClient> launcher = createCluster(clustername,
-           [
-               (ROLE_MASTER): masters,
-               (ROLE_WORKER): workers,
-           ],
-           [
-               ARG_RES_COMP_OPT , ROLE_MASTER, ResourceKeys.YARN_MEMORY, YRAM,
-               ARG_RES_COMP_OPT , ROLE_WORKER, ResourceKeys.YARN_MEMORY, YRAM,
-               ARG_PROVIDER , PROVIDER_HBASE
-           ],
-           true,
-           true,
-           [:]);
+        [
+            (ROLE_MASTER): masters,
+            (ROLE_WORKER): workers,
+        ],
+        hbaseArgs([]),
+        true,
+        true,
+        [:]);
     sliderClient = launcher.service;
-    try {
-      basicHBaseClusterStartupSequence(sliderClient);
 
-      describe("Waiting for initial worker count of $workers");
+    basicHBaseClusterStartupSequence(sliderClient);
 
-      //verify the #of roles is as expected
-      //get the hbase status
-      waitForWorkerInstanceCount(sliderClient, workers, hbaseClusterStartupToLiveTime);
-      waitForSliderMasterCount(sliderClient, masters, hbaseClusterStartupToLiveTime);
+    describe("Waiting for initial worker count of $workers");
 
-      log.info("Slider worker count at $workers, waiting for region servers to match");
-      waitForHBaseRegionServerCount(sliderClient, clustername, workers, hbaseClusterStartupToLiveTime);
+    //verify the #of roles is as expected
+    //get the hbase status
+    waitForWorkerInstanceCount(
+        sliderClient,
+        workers,
+        hbaseClusterStartupToLiveTime);
+    waitForSliderMasterCount(
+        sliderClient,
+        masters,
+        hbaseClusterStartupToLiveTime);
 
-      //now flex
-      describe("Flexing  masters:$masters -> $masterFlexTarget ; workers $workers -> $flexTarget");
-      boolean flexed;
-      flexed = 0 == sliderClient.flex(clustername,
-          [
-              (ROLE_WORKER): flexTarget,
-              (ROLE_MASTER): masterFlexTarget
-          ]
-      );
-      waitForWorkerInstanceCount(sliderClient, flexTarget, hbaseClusterStartupToLiveTime);
-      waitForSliderMasterCount(sliderClient, masterFlexTarget,
-                             hbaseClusterStartupToLiveTime);
+    log.info(
+        "Slider worker count at $workers, waiting for region servers to match");
+    waitForHBaseRegionServerCount(
+        sliderClient,
+        clustername,
+        workers,
+        hbaseClusterStartupToLiveTime);
+    sliderClient
+  }
 
-      if (testHBaseAfter) {
-        waitForHBaseRegionServerCount(sliderClient, clustername, flexTarget,
-                                      hbaseClusterStartupToLiveTime);
-      }
-      return flexed;
-    } finally {
-      maybeStopCluster(sliderClient, null, "end of flex test run");
+  public boolean flexCluster(
+      SliderClient sliderClient,
+      String clustername,
+      int masterFlexTarget,
+      int workerFlexTarget,
+      boolean testHBaseAfter) {
+    int flexTarget
+    describe(
+        "Flexing  masters -> $masterFlexTarget ; workers -> ${workerFlexTarget}");
+    boolean flexed;
+    flexed = 0 == sliderClient.flex(clustername,
+        [
+            (ROLE_WORKER): workerFlexTarget,
+            (ROLE_MASTER): masterFlexTarget
+        ]
+    );
+    waitForWorkerInstanceCount(
+        sliderClient,
+        workerFlexTarget,
+        hbaseClusterStartupToLiveTime);
+    waitForSliderMasterCount(sliderClient, masterFlexTarget,
+        hbaseClusterStartupToLiveTime);
+
+    if (testHBaseAfter) {
+      waitForHBaseRegionServerCount(sliderClient, clustername, workerFlexTarget,
+          hbaseClusterStartupToLiveTime);
     }
-
+    flexed
   }
 
   /**
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy
index c9e0b24..85726a7 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestFreezeThawClusterFromArchive.groovy
@@ -37,9 +37,8 @@
 
   @Test
   public void testFreezeThawClusterFromArchive() throws Throwable {
-    String clustername = "test_freeze_thaw_cluster_from_archive"
     int regionServerCount = 1
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
     switchToImageDeploy = true
     ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername, regionServerCount, [], true, true)
     SliderClient sliderClient = launcher.service
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy
index e5bfb66..22fa4c7 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchive.groovy
@@ -37,8 +37,8 @@
 
   @Test
   public void testLiveClusterFromArchive() throws Throwable {
-    String clustername = testClusterName
     int regionServerCount = 1
+    String clustername = testClusterName
     createMiniCluster(clustername,
         configuration,
         regionServerCount + 1,
@@ -66,7 +66,7 @@
   }
 
   public String getTestClusterName() {
-    return "test_live_cluster_from_archive"
+    return "testliveclusterfromarchive"
   }
 
   public boolean startHDFS() {
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy
index 2245e2c..cecee3f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/archives/TestLiveClusterFromArchiveOnHDFS.groovy
@@ -30,7 +30,7 @@
 
   @Override
   String getTestClusterName() {
-    "test_live_cluster_from_archiveonhdfs"
+    "testliveclusterfromarchiveonhdfs"
   }
 
   @Override
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy
index a2af619..c305b5b 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/build/TestBuildThawClusterM1W1.groovy
@@ -38,8 +38,7 @@
 
   @Test
   public void test_build_thaw_cluster_m1_w1() throws Throwable {
-    String clustername = "test_build_thaw_cluster_m1_w1"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that a built cluster can be thawed"
 
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy
index 16cdef6..fe739c4 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailedRegionService.groovy
@@ -39,19 +39,18 @@
 
   @Test
   public void testFailedRegionService() throws Throwable {
-    testRegionService("test_failed_region_service", true)
+    testRegionService("", true)
   }
   
   @Test
   public void testStoppedRegionService() throws Throwable {
-    testRegionService("test_stopped_region_service", false)
+    testRegionService("", false)
   }
   
   private void testRegionService(String testName, boolean toKill) {
-    String clustername = testName
     String action = toKill ? "kill" : "stop"
     int regionServerCount = 2
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+    String clustername = createMiniCluster(testName, configuration, 1, 1, 1, true, true)
     describe("Create a single region service cluster then " + action + " the RS");
 
     //now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy
index 2237c5d..d219636 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseAM.groovy
@@ -49,7 +49,6 @@
   public void testKilledHBaseAM() throws Throwable {
     skip("SLIDER-66: AM Restart Failing -YARN issues")
     
-    String clustername = "test_killed_hbase_am"
     int regionServerCount = 1
 
 
@@ -58,7 +57,7 @@
     conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, 3)
 
     conf.set(YarnConfiguration.RM_SCHEDULER, FIFO_SCHEDULER);
-    createMiniCluster(clustername, conf, 1, 1, 1, true, false)
+    String clustername = createMiniCluster("", conf, 1, 1, 1, true, false)
     describe(" Kill the AM, expect cluster to die");
 
     //now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy
index 35ed129..1e19d71 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestKilledHBaseMaster.groovy
@@ -40,9 +40,9 @@
 
   @Test
   public void testKilledHBaseMaster() throws Throwable {
-    String clustername = "test_killed_hbase_master"
     int regionServerCount = 1
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, true)
     describe("Kill the hbase master and expect a restart");
 
     //now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailureThreshold.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
similarity index 65%
rename from slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailureThreshold.groovy
rename to slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
index e43ad81..eb44ae0 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestFailureThreshold.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
@@ -23,10 +23,10 @@
 import org.apache.hadoop.hbase.ClusterStatus
 import org.apache.hadoop.yarn.api.records.ApplicationReport
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus
+import org.apache.slider.api.ResourceKeys
 import org.apache.slider.core.main.ServiceLauncher
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.api.ClusterDescription
-import org.apache.slider.api.OptionKeys
 import org.apache.slider.core.exceptions.BadClusterStateException
 import org.apache.slider.core.exceptions.ErrorStrings
 import org.apache.slider.common.params.Arguments
@@ -34,6 +34,8 @@
 import org.apache.slider.providers.hbase.minicluster.HBaseMiniClusterTestBase
 import org.junit.Test
 
+import static org.apache.slider.providers.hbase.HBaseKeys.ROLE_WORKER
+
 /**
  * test that if a container is killed too many times,
  * the AM stays down
@@ -41,39 +43,61 @@
 @CompileStatic
 @Slf4j
 
-class TestFailureThreshold extends HBaseMiniClusterTestBase {
+class TestRegionServerFailureThreshold extends HBaseMiniClusterTestBase {
 
   @Test
-  public void testFailedRegionService() throws Throwable {
-    failureThresholdTestRun("test_failure_threshold", true, 2, 5)
+  public void testRegionServerFailureThreshold() throws Throwable {
+    failureThresholdTestRun("", true, 2, 5)
   }
 
-
-  
+  /**
+   * Sets the failure threshold then runs the #of kill attempts
+   * @param testName
+   * @param toKill
+   * @param threshold
+   * @param killAttempts
+   */
   private void failureThresholdTestRun(
       String testName,
       boolean toKill,
       int threshold,
       int killAttempts) {
-    String clustername = testName
     String action = toKill ? "kill" : "stop"
-    int regionServerCount = 2
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+    int regionServerCount = 1
+    String clustername = createMiniCluster(testName, configuration, 1, 1, 1, true, true)
     describe(
-        "Create a single region service cluster then " + action + " the RS");
+        "Create a single region service HBase instance" +
+        "then $action the RS $killAttempts times with a threshold of $threshold");
 
     //now launch the cluster
+    def globalThreshold = threshold - 1
     ServiceLauncher<SliderClient> launcher = createHBaseCluster(
         clustername,
         regionServerCount,
         [
-            Arguments.ARG_OPTION, OptionKeys.INTERNAL_CONTAINER_FAILURE_THRESHOLD,
-            Integer.toString(threshold)
+            Arguments.ARG_RES_COMP_OPT,
+            ROLE_WORKER,
+            ResourceKeys.CONTAINER_FAILURE_THRESHOLD,
+            Integer.toString(threshold),
+
+            Arguments.ARG_RESOURCE_OPT, 
+            ResourceKeys.CONTAINER_FAILURE_THRESHOLD,
+            Integer.toString(globalThreshold)
         ],
         true,
         true)
     SliderClient client = launcher.service
     addToTeardown(client);
+    def aggregateConf = client.loadPersistedClusterDescription(clustername)
+    log.info aggregateConf.toString()
+
+    def resourceOperations = aggregateConf.resourceOperations
+    def failureOptValue = resourceOperations.globalOptions.getMandatoryOptionInt(
+        ResourceKeys.CONTAINER_FAILURE_THRESHOLD)
+    assert globalThreshold == failureOptValue
+    def workerThreshold = resourceOperations.getComponentOptInt(ROLE_WORKER,
+        ResourceKeys.CONTAINER_FAILURE_THRESHOLD, 0)
+    assert threshold == workerThreshold
     ClusterDescription status = client.getClusterDescription(clustername)
 
     ClusterStatus clustat = basicHBaseClusterStartupSequence(client)
@@ -109,7 +133,7 @@
         describe("waiting for recovery")
 
         //and expect a recovery 
-        if (restarts < threshold) {
+        if (restarts <= threshold) {
 
           def restartTime = 1000
           status = waitForWorkerInstanceCount(
@@ -125,20 +149,30 @@
           //expect the cluster to have failed
           try {
             def finalCD = client.getClusterDescription(clustername)
-            dumpClusterDescription("expected the AM to have failed", finalCD)
+            describe( "failure threshold ignored")
+            dumpClusterDescription("expected the cluster to have failed", finalCD)
+            describe "stopping cluster"
+            maybeStopCluster(
+                client,
+                "",
+                "stopping cluster that isn't failing correctly")
+            
+            
             fail("AM had not failed after $restarts worker kills")
             
           } catch (BadClusterStateException e) {
-            assert e.toString().contains(ErrorStrings.E_APPLICATION_NOT_RUNNING)
-            assert e.exitCode == SliderExitCodes.EXIT_BAD_STATE
+            assertExceptionDetails(e,
+                SliderExitCodes.EXIT_BAD_STATE,
+                ErrorStrings.E_APPLICATION_NOT_RUNNING)
             //success
             break;
           }
         }
       }
     } catch (BadClusterStateException e) {
-      assert e.toString().contains(ErrorStrings.E_APPLICATION_NOT_RUNNING)
-      assert e.exitCode == SliderExitCodes.EXIT_BAD_STATE
+      assertExceptionDetails(e,
+          SliderExitCodes.EXIT_BAD_STATE, 
+          ErrorStrings.E_APPLICATION_NOT_RUNNING)
     }
     ApplicationReport report = client.applicationReport
     log.info(report.diagnostics)
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy
index 3a66873..ec8d264 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex0To1.groovy
@@ -30,7 +30,7 @@
 
   @Test
   public void testClusterFlex0To1() throws Throwable {
-    assert flexHBaseClusterTestRun("test_cluster_flex_0to1", 1, 1, 0, 1, false)
+    assert flexHBaseClusterTestRun("", 1, 1, 0, 1, false)
   }
 
 }
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy
index ba1bb94..fdbbce8 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To1.groovy
@@ -32,9 +32,9 @@
 class TestClusterFlex1To1 extends HBaseMiniClusterTestBase {
 
   @Test
-  public void testClusterFlexPersistent() throws Throwable {
+  public void testClusterFlex1To1() throws Throwable {
     assert !flexHBaseClusterTestRun(
-        "test_cluster_flex_1to1",
+        "",
         1,
         1,
         1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy
index fd8e1ae..be38c3d 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex1To2.groovy
@@ -31,9 +31,9 @@
 class TestClusterFlex1To2 extends HBaseMiniClusterTestBase {
 
   @Test
-  public void testClusterFlex() throws Throwable {
+  public void testClusterFlex1To2() throws Throwable {
     assert flexHBaseClusterTestRun(
-        "test_cluster_flex_1to2",
+        "",
         1,
         1,
         1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy
index c76a9d3..e36e067 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2DownTo1.groovy
@@ -34,7 +34,7 @@
   @Test
   public void testClusterFlex2DownTo1() throws Throwable {
     assert flexHBaseClusterTestRun(
-        "test_cluster_flex_2_down_to_1",
+        "",
         1, 1,
         2,
         1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy
index 39c1cac..683c02a 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlex2To5.groovy
@@ -31,9 +31,9 @@
 class TestClusterFlex2To5 extends HBaseMiniClusterTestBase {
 
   @Test
-  public void testClusterFlex() throws Throwable {
+  public void testClusterFlex2To5() throws Throwable {
     assert flexHBaseClusterTestRun(
-        "test_cluster_flex_2to5",
+        "",
         1,
         1,
         2,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownMultiple.groovy
similarity index 72%
copy from slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
copy to slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownMultiple.groovy
index 0910c6e..97a9b35 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownMultiple.groovy
@@ -20,6 +20,7 @@
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.slider.client.SliderClient
 import org.apache.slider.providers.hbase.minicluster.HBaseMiniClusterTestBase
 import org.junit.Test
 
@@ -28,17 +29,29 @@
  */
 @CompileStatic
 @Slf4j
-class TestHMasterFlex1To2 extends HBaseMiniClusterTestBase {
+class TestClusterFlexDownMultiple extends HBaseMiniClusterTestBase {
 
   @Test
-  public void testClusterFlex() throws Throwable {
-    assert flexHBaseClusterTestRun(
-        "test_hmaster_flex_1to2",
+  public void testClusterFlexDownMultiple() throws Throwable {
+    def clusterName = createClusterName();
+    SliderClient sliderClient = startHBaseCluster(clusterName, 1, 3)
+    
+    assert flexCluster(
+        sliderClient,
+        clusterName,
         1,
         2,
+        true)
+
+    assert flexCluster(
+        sliderClient,
+        clusterName,
         1,
         1,
         true)
+    
+
+    
   }
 
 
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy
index 5b392e8..c43c5bd 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexDownToZero.groovy
@@ -33,7 +33,7 @@
   @Test
   public void testClusterFlexDownToZero() throws Throwable {
     assert flexHBaseClusterTestRun(
-        "test_cluster_flex_down_to_zero",
+        "",
         1,
         1,
         1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexHMasterFlex1To2.groovy
similarity index 89%
rename from slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
rename to slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexHMasterFlex1To2.groovy
index 0910c6e..298f40f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestHMasterFlex1To2.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/flexing/TestClusterFlexHMasterFlex1To2.groovy
@@ -28,12 +28,12 @@
  */
 @CompileStatic
 @Slf4j
-class TestHMasterFlex1To2 extends HBaseMiniClusterTestBase {
+class TestClusterFlexHMasterFlex1To2 extends HBaseMiniClusterTestBase {
 
   @Test
-  public void testClusterFlex() throws Throwable {
+  public void testClusterMasterFlex1To2() throws Throwable {
     assert flexHBaseClusterTestRun(
-        "test_hmaster_flex_1to2",
+        "",
         1,
         2,
         1,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy
index 62e4d4b..f6748e0 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeReconfigureThawLiveRegionService.groovy
@@ -25,9 +25,9 @@
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hbase.ClusterStatus
 import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.api.InternalKeys
 import org.apache.slider.core.main.ServiceLauncher
 import org.apache.slider.api.ClusterDescription
-import org.apache.slider.api.OptionKeys
 import org.apache.slider.core.build.InstanceIO
 import org.apache.slider.providers.hbase.HBaseKeys
 import org.apache.slider.common.tools.ConfigHelper
@@ -47,13 +47,12 @@
 
   @Test
   public void testFreezeReconfigureThawLiveRegionService() throws Throwable {
-    String clustername = "test_freeze_reconfigure_thaw_live_regionservice"
     int regionServerCount = 4
     int nodemanagers = 3
     YarnConfiguration conf = configuration
     //one vcore per node
     conf.setInt("yarn.nodemanager.resource.cpu-vcores", 1)
-    createMiniCluster(clustername, conf, nodemanagers, true)
+    String clustername = createMiniCluster("", conf, nodemanagers, true)
     describe(
         "Create a $regionServerCount node cluster, freeze it, patch the configuration files," +
         " thaw it and verify that it came back with the new settings")
@@ -94,7 +93,7 @@
         clusterDir)
 
     def snapshotPath = instanceDefinition.internalOperations.get(
-        OptionKeys.INTERNAL_SNAPSHOT_CONF_PATH)
+        InternalKeys.INTERNAL_SNAPSHOT_CONF_PATH)
     assert snapshotPath != null
 
     Path confdir = new Path(snapshotPath);
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy
index f9d460e..66dd4f0 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/freezethaw/TestFreezeThawLiveRegionService.groovy
@@ -41,9 +41,8 @@
 
   @Test
   public void testFreezeThawLiveRegionService() throws Throwable {
-    String clustername = "test_freeze_thaw_live_regionservice"
     int regionServerCount = 2
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
     describe("Create a cluster, freeze it, thaw it and verify that it came back ")
     //use a smaller AM HEAP to include it in the test cycle
     ServiceLauncher launcher = createHBaseCluster(clustername, regionServerCount,
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy
index c2652b8..47f5e1c 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/Test2Master2RS.groovy
@@ -41,9 +41,8 @@
   @Test
   public void test2Master2RS() throws Throwable {
 
-    String clustername = "test2master2rs"
     int regionServerCount = 2
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, false)
+    String clustername = createMiniCluster("", configuration, 1, 1, 1, true, false)
 
     describe(" Create a two master, two region service cluster");
     //now launch the cluster
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy
index 97714d6..634ebe2 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMaster.groovy
@@ -46,8 +46,7 @@
 
   @Test
   public void testHBaseMaster() throws Throwable {
-    String clustername = "test_hbase_master"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
     //make sure that ZK is up and running at the binding string
     ZKIntegration zki = createZKIntegrationInstance(ZKBinding, clustername, false, false, 5000)
     //now launch the cluster with 1 region server
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy
index fffd6c0..7f51f95 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterOnHDFS.groovy
@@ -35,8 +35,8 @@
 
   @Test
   public void testHBaseMasteOnHDFS() throws Throwable {
-    String clustername = "test_hbase_master_on_hdfs"
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, true)
     log.info("HDFS is at $fsDefaultName")
     assert fsDefaultName.startsWith("hdfs://")
     ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername, 1, [], true, true) 
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy
index 9ca5f45..aeb3c47 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestHBaseMasterWithBadHeap.groovy
@@ -42,8 +42,7 @@
 
   @Test
   public void testHBaseMasterWithBadHeap() throws Throwable {
-    String clustername = "test_hbase_master_with_bad_heap"
-    createMiniCluster(clustername, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that bad Java heap options are picked up"
     //now launch the cluster with 1 region server
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy
index 26292fb..957d167 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveRegionServiceOnHDFS.groovy
@@ -37,9 +37,9 @@
 
   @Test
   public void testLiveRegionServiceOnHDFS() throws Throwable {
-    String clustername = "test_live_region_service_on_hdfs"
     int regionServerCount = 1
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, true)
     describe(" Create a single region service cluster");
 
     //make sure that ZK is up and running at the binding string
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy
index 9b4b8a7..3561d2f 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestLiveTwoNodeRegionService.groovy
@@ -41,9 +41,9 @@
   @Test
   public void testLiveTwoNodeRegionService() throws Throwable {
     
-    String clustername = "test_live_two_node_regionservice"
     int regionServerCount = 2
-    createMiniCluster(clustername, configuration, 1, 1, 1, true, false)
+    String clustername = createMiniCluster(
+        "", configuration, 1, 1, 1, true, false)
 
     describe(" Create a two node region service cluster");
 
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy
index 0140030..7e4c5ed 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/live/TestTwoLiveClusters.groovy
@@ -42,10 +42,9 @@
    */
   @Test
   public void testTwoLiveClusters() throws Throwable {
-    def name = "test_two_live_clusters"
-    createMiniCluster(name, configuration, 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
-    String clustername1 = name + "-1"
+    String clustername1 = clustername + "-1"
     //now launch the cluster
     int regionServerCount = 1
     ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername1, regionServerCount, [], true, true) 
@@ -62,7 +61,7 @@
     waitForHBaseRegionServerCount(sliderClient, clustername1, 1, hbaseClusterStartupToLiveTime)
 
     //now here comes cluster #2
-    String clustername2 = name + "-2"
+    String clustername2 = clustername + "-2"
 
 
     String zkpath = "/$clustername2"
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy
index c44de16..cffde39 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestRoleOptPropagation.groovy
@@ -42,8 +42,7 @@
   
   public void testRoleOptPropagation() throws Throwable {
     skip("Disabled")
-    String clustername = "test_role_opt_propagation"
-    createMiniCluster(clustername, getConfiguration(), 1, true)
+    String clustername = createMiniCluster("", getConfiguration(), 1, true)
 
     describe "verify that role options propagate down to deployed roles"
 
@@ -71,8 +70,7 @@
 
   @Test
   public void testUnknownRole() throws Throwable {
-    String clustername = "test_unknown_role"
-    createMiniCluster(clustername, getConfiguration(), 1, true)
+    String clustername = createMiniCluster("", configuration, 1, true)
 
     describe "verify that unknown role results in cluster creation failure"
     try {
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy
index 2cdf2bb..c5e0282 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/masterless/TestSliderConfDirToMasterlessAM.groovy
@@ -46,9 +46,8 @@
 
   @Test
   public void testSliderConfDirToMasterlessAM() throws Throwable {
-    String clustername = "test_slider_conf_dir_to_masterless_am"
     YarnConfiguration conf = configuration
-    createMiniCluster(clustername, conf, 1, true)
+    String clustername = createMiniCluster("", conf, 1, true)
 
     describe "verify that a conf dir will propagate via the sytem proerpty"
 
diff --git a/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties b/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties
index a552a55..8f633b2 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties
+++ b/slider-providers/hbase/slider-hbase-provider/src/test/resources/log4j.properties
@@ -42,6 +42,7 @@
 log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN
 log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
 log4j.logger.org.apache.hadoop.hdfs=WARN
+log4j.logger.BlockStateChange=WARN
 
 
 log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN
@@ -57,3 +58,4 @@
 log4j.logger.org.apache.hadoop.yarn.util.AbstractLivelinessMonitor=WARN
 log4j.logger.org.apache.hadoop.yarn.server.nodemanager.security=WARN
 log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMNMInfo=WARN
+log4j.logger.org.apache.hadoop.hbase.client.HConnectionManager=WARN
\ No newline at end of file
diff --git a/src/test/clusters/c6401/slider/log4j.properties b/src/test/clusters/c6401/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/c6401/slider/log4j.properties
+++ b/src/test/clusters/c6401/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#   
-#    http://www.apache.org/licenses/LICENSE-2.0
-#   
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License. See accompanying LICENSE file.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 #
 # log4j configuration used during build and unit tests
 
diff --git a/src/test/clusters/morzine/slider/log4j.properties b/src/test/clusters/morzine/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/morzine/slider/log4j.properties
+++ b/src/test/clusters/morzine/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#   
-#    http://www.apache.org/licenses/LICENSE-2.0
-#   
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License. See accompanying LICENSE file.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 #
 # log4j configuration used during build and unit tests
 
diff --git a/src/test/clusters/offline/slider/log4j.properties b/src/test/clusters/offline/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/offline/slider/log4j.properties
+++ b/src/test/clusters/offline/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#   
-#    http://www.apache.org/licenses/LICENSE-2.0
-#   
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License. See accompanying LICENSE file.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 #
 # log4j configuration used during build and unit tests
 
diff --git a/src/test/clusters/offline/slider/slider-client.xml b/src/test/clusters/offline/slider/slider-client.xml
index 8385086..25c01cf 100644
--- a/src/test/clusters/offline/slider/slider-client.xml
+++ b/src/test/clusters/offline/slider/slider-client.xml
@@ -38,11 +38,6 @@
   </property>
 
   <property>
-    <name>slider.funtest.enabled</name>
-    <value>false</value>
-  </property>
-
-  <property>
     <name>yarn.application.classpath</name>
     <value>
       /etc/hadoop/conf,/usr/lib/hadoop/*,/usr/lib/hadoop/lib/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-hdfs/lib/*,/usr/lib/hadoop-yarn/*,/usr/lib/hadoop-yarn/lib/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-mapreduce/lib/*
@@ -62,12 +57,6 @@
   </property>
  
   <property>
-    <name>slider.test.zkhosts</name>
-    <description>list of the zookeeper hosts</description>
-    <value></value>
-  </property>
-  
-  <property>
     <name>slider.test.accumulo.enabled</name>
     <description>Flag to enable/disable Accumulo tests</description>
     <value>false</value>
diff --git a/src/test/clusters/remote/slider/log4j.properties b/src/test/clusters/remote/slider/log4j.properties
index f672472..5b59190 100644
--- a/src/test/clusters/remote/slider/log4j.properties
+++ b/src/test/clusters/remote/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#   
-#    http://www.apache.org/licenses/LICENSE-2.0
-#   
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License. See accompanying LICENSE file.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 #
 # log4j configuration used during build and unit tests
 
diff --git a/src/test/clusters/sandbox/slider/log4j.properties b/src/test/clusters/sandbox/slider/log4j.properties
index d814f14..4682a96 100644
--- a/src/test/clusters/sandbox/slider/log4j.properties
+++ b/src/test/clusters/sandbox/slider/log4j.properties
@@ -1,15 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#   
-#    http://www.apache.org/licenses/LICENSE-2.0
-#   
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License. See accompanying LICENSE file.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 #
 # log4j configuration used during build and unit tests
 
diff --git a/src/test/clusters/sandbox/slider/slider-client.xml b/src/test/clusters/sandbox/slider/slider-client.xml
index 30937ec..5ac5d59 100644
--- a/src/test/clusters/sandbox/slider/slider-client.xml
+++ b/src/test/clusters/sandbox/slider/slider-client.xml
@@ -52,7 +52,7 @@
   <property>
     <name>slider.test.agent.enabled</name>
     <description>Flag to enable/disable Agent tests</description>
-    <value>false</value>
+    <value>true</value>
   </property>